]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
fixes while testing
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){ 
14         try {
15                 string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){ 
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["list"] = tempOutNames;
31                 outputTypes["name"] = tempOutNames;
32                 outputTypes["accnos"] = tempOutNames;
33                 outputTypes["group"] = tempOutNames;
34                 outputTypes["fasta"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> SplitAbundCommand::getRequiredParameters(){      
43         try {
44                 string Array[] =  {"fasta","list","name","or"};
45                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 vector<string> SplitAbundCommand::getRequiredFiles(){   
55         try {
56                 vector<string> myArray;
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 SplitAbundCommand::SplitAbundCommand(string option)  {
66         try {
67                 abort = false;
68                 allLines = 1;
69                         
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string, string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         map<string, string>::iterator it;
83                 
84                         //check to make sure all parameters are valid for command
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //initialize outputTypes
90                         vector<string> tempOutNames;
91                         outputTypes["list"] = tempOutNames;
92                         outputTypes["name"] = tempOutNames;
93                         outputTypes["accnos"] = tempOutNames;
94                         outputTypes["group"] = tempOutNames;
95                         outputTypes["fasta"] = tempOutNames;                    
96                                                                                                 
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("list");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("group");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
116                                 }
117                                 
118                                 it = parameters.find("fasta");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
124                                 }
125                                 
126                                 it = parameters.find("name");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
132                                 }
133
134                         }
135
136                         
137                         //if the user changes the output directory command factory will send this info to us in the output parameter 
138                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
139
140                         //check for required parameters
141                         listfile = validParameter.validFile(parameters, "list", true);
142                         if (listfile == "not open") { abort = true; }
143                         else if (listfile == "not found") { listfile = ""; }
144                         else{ inputFile = listfile; }   
145                         
146                         namefile = validParameter.validFile(parameters, "name", true);
147                         if (namefile == "not open") { abort = true; }
148                         else if (namefile == "not found") { namefile = ""; }    
149                         else{ inputFile = namefile; }   
150                 
151                         fastafile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastafile == "not open") { abort = true; }
153                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
154                         
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else {  
159                                 groupMap = new GroupMap(groupfile);
160                                 
161                                 int error = groupMap->readMap();
162                                 if (error == 1) { abort = true; }
163         
164                         }
165                         
166                         groups = validParameter.validFile(parameters, "groups", false);         
167                         if (groups == "not found") { groups = ""; }
168                         else if (groups == "all") { 
169                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
170                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
171                         }else { 
172                                 m->splitAtDash(groups, Groups);
173                         }
174                         
175                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
176                         
177                         //do you have all files needed
178                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
179                         
180                         //check for optional parameter and set defaults
181                         // ...at some point should added some additional type checking...
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = "";  allLines = 1; }
184                         else { 
185                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
186                                 else { allLines = 1;  }
187                         }
188                         
189                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
190                         accnos = m->isTrue(temp); 
191                         
192                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
193                         convert(temp, cutoff); 
194
195                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
196
197                 }
198
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
202                 exit(1);
203         }
204 }
205 //**********************************************************************************************************************
206 void SplitAbundCommand::help(){
207         try {
208                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
209                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
210                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
211                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
212                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
213                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
214                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
215                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
216                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
217                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
218                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
219                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
220                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
221
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "SplitAbundCommand", "help");
225                 exit(1);
226         }
227 }
228 //**********************************************************************************************************************
229 SplitAbundCommand::~SplitAbundCommand(){ 
230         if (groupfile != "") {  delete groupMap;  } 
231 }
232 //**********************************************************************************************************************
233 int SplitAbundCommand::execute(){
234         try {
235         
236                 if (abort == true) {    return 0;       }
237                 
238                 if (listfile != "") { //you are using a listfile to determine abundance
239                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
240                 
241                         //remove old files so you can append later....
242                         string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
243                         if (Groups.size() == 0) {
244                                 remove((fileroot + "rare.list").c_str());
245                                 remove((fileroot + "abund.list").c_str());
246                                 
247                                 outputNames.push_back((fileroot + "rare.list"));
248                                 outputNames.push_back((fileroot + "abund.list"));
249                                 outputTypes["list"].push_back((fileroot + "rare.list"));
250                                 outputTypes["list"].push_back((fileroot + "abund.list"));
251                         }else{
252                                 for (int i=0; i<Groups.size(); i++) {
253                                         remove((fileroot + Groups[i] + ".rare.list").c_str());
254                                         remove((fileroot + Groups[i] + ".abund.list").c_str());
255                                         
256                                         outputNames.push_back((fileroot + Groups[i] + ".rare.list"));
257                                         outputNames.push_back((fileroot + Groups[i] + ".abund.list"));
258                                         outputTypes["list"].push_back((fileroot + Groups[i] + ".rare.list"));
259                                         outputTypes["list"].push_back((fileroot + Groups[i] + ".abund.list"));
260                                 }
261                         }
262                         
263                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
264                         set<string> processedLabels;
265                         set<string> userLabels = labels;        
266                         
267                         input = new InputData(listfile, "list");
268                         list = input->getListVector();
269                         string lastLabel = list->getLabel();
270                         
271                         //do you have a namefile or do we need to similate one?
272                         if (namefile != "") {  readNamesFile();         }
273                         else                            { createNameMap(list);  }
274                         
275                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
276                         
277                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
278                         
279                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
280                                 
281                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
282                                                 
283                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
284                                                 splitList(list);
285                                                                                         
286                                                 processedLabels.insert(list->getLabel());
287                                                 userLabels.erase(list->getLabel());
288                                 }
289                                 
290                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
291                                                 string saveLabel = list->getLabel();
292                                                 
293                                                 delete list;
294                                                 list = input->getListVector(lastLabel); //get new list vector to process
295                                                 
296                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
297                                                 splitList(list);
298                                                 
299                                                 processedLabels.insert(list->getLabel());
300                                                 userLabels.erase(list->getLabel());
301                                                 
302                                                 //restore real lastlabel to save below
303                                                 list->setLabel(saveLabel);
304                                 }
305                                 
306                         
307                                 lastLabel = list->getLabel();
308                                         
309                                 delete list;
310                                 list = input->getListVector(); //get new list vector to process
311                         }
312                         
313                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
314                         
315                         //output error messages about any remaining user labels
316                         set<string>::iterator it;
317                         bool needToRun = false;
318                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
319                                 m->mothurOut("Your file does not include the label " + *it); 
320                                 if (processedLabels.count(lastLabel) != 1) {
321                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
322                                         needToRun = true;
323                                 }else {
324                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
325                                 }
326
327                         }
328                         
329                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
330                         
331                         //run last label if you need to
332                         if (needToRun == true)  {
333                                 if (list != NULL) {     delete list;    }
334                                 list = input->getListVector(lastLabel); //get new list vector to process
335                                 
336                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
337                                 splitList(list);                
338                                 
339                                 delete list;
340                         }
341                         
342                         delete input;
343                         
344                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
345                                                                         
346                 }else { //you are using the namefile to determine abundance
347                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
348                         
349                         splitNames(); 
350                         writeNames();
351                         
352                         string tag = "";
353                         if (groupfile != "")                            {  parseGroup(tag);             }
354                         if (accnos)                                                     {  writeAccnos(tag);    }
355                         if (fastafile != "")                            {  parseFasta(tag);             }
356                 }
357
358                 m->mothurOutEndLine();
359                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
360                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
361                 m->mothurOutEndLine();
362                 
363                 return 0;
364         }
365         catch(exception& e) {
366                 m->errorOut(e, "SplitAbundCommand", "execute");
367                 exit(1);
368         }
369 }
370 /**********************************************************************************************************************/
371 int SplitAbundCommand::splitList(ListVector* thisList) {
372         try {
373                 rareNames.clear();
374                 abundNames.clear();
375                 
376                 //get rareNames and abundNames
377                 for (int i = 0; i < thisList->getNumBins(); i++) {
378                         if (m->control_pressed) { return 0; }
379                         
380                         string bin = thisList->get(i);
381                                                 
382                         vector<string> names;
383                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
384                         int size = names.size();
385                                 
386                         if (size <= cutoff) {
387                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
388                         }else{
389                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
390                         }
391                 }//end for
392
393                 writeList(thisList);
394                 
395                 string tag = thisList->getLabel() + ".";
396                 if (groupfile != "")                            {  parseGroup(tag);             }
397                 if (accnos)                                                     {  writeAccnos(tag);    }
398                 if (fastafile != "")                            {  parseFasta(tag);             }
399                 
400                 return 0;
401
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "SplitAbundCommand", "splitList");
405                 exit(1);
406         }
407 }
408 /**********************************************************************************************************************/
409 int SplitAbundCommand::writeList(ListVector* thisList) { 
410         try {
411                 
412                 map<string, ofstream*> filehandles;
413                 
414                 if (Groups.size() == 0) {
415                         SAbundVector* sabund = new SAbundVector();
416                         *sabund = thisList->getSAbundVector();
417                 
418                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
419                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
420                         int numRareBins = 0;
421                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
422                                 if (i > cutoff) { break; }
423                                 numRareBins += sabund->get(i);
424                         }
425                         int numAbundBins = thisList->getNumBins() - numRareBins;
426                         delete sabund;
427
428                         ofstream aout;
429                         ofstream rout;
430                         
431                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile))  + "rare.list";
432                         m->openOutputFileAppend(rare, rout);
433                         //outputNames.push_back(rare);
434                         
435                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile))  + "abund.list";
436                         m->openOutputFileAppend(abund, aout);
437                         //outputNames.push_back(abund);
438
439                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
440                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
441
442                         for (int i = 0; i < thisList->getNumBins(); i++) {
443                                 if (m->control_pressed) { break; }
444                         
445                                 string bin = list->get(i); 
446                         
447                                 int size = m->getNumNames(bin);
448                         
449                                 if (size <= cutoff) {  rout << bin << '\t';  }
450                                 else                            {  aout << bin << '\t'; }
451                         }
452                         
453                         if (rareNames.size() != 0)      { rout << endl; }
454                         if (abundNames.size() != 0) { aout << endl; }
455                         
456                         rout.close();
457                         aout.close();
458                         
459                 }else{ //parse names by abundance and group
460                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
461                         ofstream* temp;
462                         ofstream* temp2;
463                         //map<string, bool> wroteFile;
464                         map<string, ofstream*> filehandles;
465                         map<string, ofstream*>::iterator it3;
466
467                         for (int i=0; i<Groups.size(); i++) {
468                                 temp = new ofstream;
469                                 filehandles[Groups[i]+".rare"] = temp;
470                                 temp2 = new ofstream;
471                                 filehandles[Groups[i]+".abund"] = temp2;
472                                 
473                                 m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
474                                 m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
475                         }
476                         
477                         map<string, string> groupVector;
478                         map<string, string>::iterator itGroup;
479                         map<string, int> groupNumBins;
480                 
481                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
482                                 groupNumBins[it3->first] = 0;
483                                 groupVector[it3->first] = "";
484                         }
485                 
486                         for (int i = 0; i < thisList->getNumBins(); i++) {
487                                 if (m->control_pressed) { break; }
488                         
489                                 map<string, string> groupBins;
490                                 string bin = list->get(i); 
491                         
492                                 vector<string> names;
493                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
494                         
495                                 //parse bin into list of sequences in each group
496                                 for (int j = 0; j < names.size(); j++) {
497                                         string rareAbund;
498                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
499                                                 rareAbund = ".rare";
500                                         }else{ //you are a abund name
501                                                 rareAbund = ".abund";
502                                         }
503                                         
504                                         string group = groupMap->getGroup(names[j]);
505                                 
506                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
507                                                 itGroup = groupBins.find(group+rareAbund);
508                                                 if(itGroup == groupBins.end()) {
509                                                         groupBins[group+rareAbund] = names[j];  //add first name
510                                                         groupNumBins[group+rareAbund]++;
511                                                 }else{ //add another name
512                                                         groupBins[group+rareAbund] +=  "," + names[j];
513                                                 }
514                                         }else if(group == "not found") {
515                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
516                                         }
517                                 }
518                         
519                         
520                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
521                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
522                                 }
523                         }
524                         
525                         //end list vector
526                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
527                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
528                                 (*(filehandles[it3->first])).close();
529                                 delete it3->second;
530                         }
531                 }
532                 
533                 return 0;
534
535         }
536         catch(exception& e) {
537                 m->errorOut(e, "SplitAbundCommand", "writeList");
538                 exit(1);
539         }
540 }
541 /**********************************************************************************************************************/
542 int SplitAbundCommand::splitNames() { //namefile
543         try {
544                 
545                 rareNames.clear();
546                 abundNames.clear();     
547                         
548                 //open input file
549                 ifstream in;
550                 m->openInputFile(namefile, in);
551                 
552                 while (!in.eof()) {
553                         if (m->control_pressed) { break; }
554                         
555                         string firstCol, secondCol;
556                         in >> firstCol >> secondCol; m->gobble(in);
557                         
558                         nameMap[firstCol] = secondCol;
559                         
560                         int size = m->getNumNames(secondCol);
561                                 
562                         if (size <= cutoff) {
563                                 rareNames.insert(firstCol); 
564                         }else{
565                                 abundNames.insert(firstCol); 
566                         }
567                 }
568                 in.close();
569                                 
570                 return 0;
571
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "SplitAbundCommand", "splitNames");
575                 exit(1);
576         }
577 }
578 /**********************************************************************************************************************/
579 int SplitAbundCommand::readNamesFile() { 
580         try {
581                 //open input file
582                 ifstream in;
583                 m->openInputFile(namefile, in);
584                 
585                 while (!in.eof()) {
586                         if (m->control_pressed) { break; }
587                         
588                         string firstCol, secondCol;
589                         in >> firstCol >> secondCol; m->gobble(in);
590                         
591                         nameMap[firstCol] = secondCol;
592                 }
593                 in.close();
594                                 
595                 return 0;
596
597         }
598         catch(exception& e) {
599                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
600                 exit(1);
601         }
602 }
603 /**********************************************************************************************************************/
604 int SplitAbundCommand::createNameMap(ListVector* thisList) {
605         try {
606                 
607                 if (thisList != NULL) {
608                         for (int i = 0; i < thisList->getNumBins(); i++) {
609                                 if (m->control_pressed) { return 0; }
610                                 
611                                 string bin = thisList->get(i);
612                                                         
613                                 vector<string> names;
614                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
615                                 
616                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
617                         }//end for
618                 }
619                 
620                 return 0;
621         }
622         catch(exception& e) {
623                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
624                 exit(1);
625         }
626 }
627 /**********************************************************************************************************************/
628 int SplitAbundCommand::writeNames() { //namefile
629         try {
630                 
631                 map<string, ofstream*> filehandles;
632
633                 if (Groups.size() == 0) {
634                         ofstream aout;
635                         ofstream rout;
636                         
637                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
638                         m->openOutputFile(rare, rout);
639                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
640                         
641                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
642                         m->openOutputFile(abund, aout);
643                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
644                         
645                         if (rareNames.size() != 0) {
646                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
647                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
648                                 }
649                         }
650                         rout.close();
651                         
652                         if (abundNames.size() != 0) {
653                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
654                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
655                                 }
656                         }
657                         aout.close();
658                         
659                 }else{ //parse names by abundance and group
660                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
661                         ofstream* temp;
662                         ofstream* temp2;
663                         map<string, ofstream*> filehandles;
664                         map<string, ofstream*>::iterator it3;
665
666                         for (int i=0; i<Groups.size(); i++) {
667                                 temp = new ofstream;
668                                 filehandles[Groups[i]+".rare"] = temp;
669                                 temp2 = new ofstream;
670                                 filehandles[Groups[i]+".abund"] = temp2;
671                                 
672                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
673                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
674                         }
675                         
676                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
677                                 vector<string> names;
678                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
679                                 
680                                 string rareAbund;
681                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
682                                                 rareAbund = ".rare";
683                                 }else{ //you are a abund name
684                                                 rareAbund = ".abund";
685                                 }
686                                 
687                                 map<string, string> outputStrings;
688                                 map<string, string>::iterator itout;
689                                 for (int i = 0; i < names.size(); i++) {
690                                         
691                                         string group = groupMap->getGroup(names[i]);
692                                         
693                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
694                                                 itout = outputStrings.find(group+rareAbund);
695                                                 if (itout == outputStrings.end()) {  
696                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
697                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
698                                         }else if(group == "not found") {
699                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
700                                         }
701                                 }
702                                 
703                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
704                         }
705                         
706                         
707                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
708                                 (*(filehandles[it3->first])).close();
709                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
710                                 delete it3->second;
711                         }
712                 }
713                                 
714                 return 0;
715
716         }
717         catch(exception& e) {
718                 m->errorOut(e, "SplitAbundCommand", "writeNames");
719                 exit(1);
720         }
721 }
722 /**********************************************************************************************************************/
723 //just write the unique names - if a namesfile is given
724 int SplitAbundCommand::writeAccnos(string tag) { 
725         try {
726                 
727                 map<string, ofstream*> filehandles;
728                 
729                 if (Groups.size() == 0) {
730                         ofstream aout;
731                         ofstream rout;
732                         
733                         
734                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
735                         m->openOutputFile(rare, rout);
736                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
737                         
738                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
739                                 rout << (*itRare) << endl;
740                         }
741                         rout.close();
742                 
743                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
744                         m->openOutputFile(abund, aout);
745                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
746                         
747                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
748                                 aout << (*itAbund) << endl;
749                         }
750                         aout.close();
751                         
752                 }else{ //parse names by abundance and group
753                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
754                         ofstream* temp;
755                         ofstream* temp2;
756                         map<string, ofstream*> filehandles;
757                         map<string, ofstream*>::iterator it3;
758                         
759                         for (int i=0; i<Groups.size(); i++) {
760                                 temp = new ofstream;
761                                 filehandles[Groups[i]+".rare"] = temp;
762                                 temp2 = new ofstream;
763                                 filehandles[Groups[i]+".abund"] = temp2;
764                                 
765                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
766                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
767                         }
768                         
769                         //write rare
770                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
771                                         string group = groupMap->getGroup(*itRare);
772                                         
773                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
774                                                 *(filehandles[group+".rare"]) << *itRare << endl;
775                                         }
776                         }
777                                 
778                         //write abund   
779                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
780                                         string group = groupMap->getGroup(*itAbund);
781                                         
782                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
783                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
784                                         }
785                         }
786                         
787                         //close files
788                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
789                                 (*(filehandles[it3->first])).close();
790                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
791                                 delete it3->second;
792                         }
793                 }
794                                 
795                 return 0;
796
797         }
798         catch(exception& e) {
799                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
800                 exit(1);
801         }
802 }
803 /**********************************************************************************************************************/
804 int SplitAbundCommand::parseGroup(string tag) { //namefile
805         try {
806                 
807                 map<string, ofstream*> filehandles;
808         
809                 if (Groups.size() == 0) {
810                         ofstream aout;
811                         ofstream rout;
812                         
813                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
814                         m->openOutputFile(rare, rout);
815                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
816                 
817                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
818                         m->openOutputFile(abund, aout);
819                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
820                         
821                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
822                                 vector<string> names;
823                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
824                                 
825                                 for (int i = 0; i < names.size(); i++) {
826                                 
827                                         string group = groupMap->getGroup(names[i]);
828                                 
829                                         if (group == "not found") { 
830                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
831                                         }else {
832                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
833                                                         rout << names[i] << '\t' << group << endl;
834                                                 }else{ //you are a abund name
835                                                         aout << names[i] << '\t' << group << endl;
836                                                 }
837                                         }
838                                 }
839                         }
840                         
841                         rout.close(); 
842                         aout.close(); 
843
844                 }else{ //parse names by abundance and group
845                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
846                         ofstream* temp;
847                         ofstream* temp2;
848                         map<string, ofstream*> filehandles;
849                         map<string, ofstream*>::iterator it3;
850
851                         for (int i=0; i<Groups.size(); i++) {
852                                 temp = new ofstream;
853                                 filehandles[Groups[i]+".rare"] = temp;
854                                 temp2 = new ofstream;
855                                 filehandles[Groups[i]+".abund"] = temp2;
856                                 
857                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
858                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
859                         }
860                         
861                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
862                                 vector<string> names;
863                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
864                                 
865                                 string rareAbund;
866                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
867                                         rareAbund = ".rare";
868                                 }else{ //you are a abund name
869                                         rareAbund = ".abund";
870                                 }
871                                 
872                                 for (int i = 0; i < names.size(); i++) {
873                                 
874                                         string group = groupMap->getGroup(names[i]);
875                                                                         
876                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
877                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
878                                         }
879                                 }
880                         }
881                         
882                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
883                                 (*(filehandles[it3->first])).close();
884                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
885                                 delete it3->second;
886                         }
887                 }
888                                 
889                 return 0;
890
891         }
892         catch(exception& e) {
893                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
894                 exit(1);
895         }
896 }
897 /**********************************************************************************************************************/
898 int SplitAbundCommand::parseFasta(string tag) { //namefile
899         try {
900                 
901                 map<string, ofstream*> filehandles;
902                 
903                 if (Groups.size() == 0) {
904                         ofstream aout;
905                         ofstream rout;
906                         
907                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
908                         m->openOutputFile(rare, rout);
909                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
910                 
911                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
912                         m->openOutputFile(abund, aout);
913                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
914                 
915                         //open input file
916                         ifstream in;
917                         m->openInputFile(fastafile, in);
918         
919                         while (!in.eof()) {
920                                 if (m->control_pressed) { break; }
921                 
922                                 Sequence seq(in); m->gobble(in);
923                                 
924                                 if (seq.getName() != "") { 
925                                         
926                                         map<string, string>::iterator itNames;
927                                         
928                                         itNames = nameMap.find(seq.getName());
929                                         
930                                         if (itNames == nameMap.end()) {
931                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
932                                         }else{
933                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
934                                                         seq.printSequence(rout);
935                                                 }else{ //you are a abund name
936                                                         seq.printSequence(aout);
937                                                 }
938                                         }
939                                 }
940                         }
941                         in.close();
942                         rout.close(); 
943                         aout.close(); 
944
945                 }else{ //parse names by abundance and group
946                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
947                         ofstream* temp;
948                         ofstream* temp2;
949                         map<string, ofstream*> filehandles;
950                         map<string, ofstream*>::iterator it3;
951
952                         for (int i=0; i<Groups.size(); i++) {
953                                 temp = new ofstream;
954                                 filehandles[Groups[i]+".rare"] = temp;
955                                 temp2 = new ofstream;
956                                 filehandles[Groups[i]+".abund"] = temp2;
957                                 
958                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
959                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
960                         }
961                         
962                         //open input file
963                         ifstream in;
964                         m->openInputFile(fastafile, in);
965         
966                         while (!in.eof()) {
967                                 if (m->control_pressed) { break; }
968                 
969                                 Sequence seq(in); m->gobble(in);
970                                 
971                                 if (seq.getName() != "") { 
972                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
973                                         
974                                         if (itNames == nameMap.end()) {
975                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
976                                         }else{
977                                                 vector<string> names;
978                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
979                                 
980                                                 string rareAbund;
981                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
982                                                         rareAbund = ".rare";
983                                                 }else{ //you are a abund name
984                                                         rareAbund = ".abund";
985                                                 }
986                                 
987                                                 for (int i = 0; i < names.size(); i++) {
988                                 
989                                                         string group = groupMap->getGroup(seq.getName());
990                                         
991                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
992                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
993                                                         }else if(group == "not found") {
994                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
995                                                         }
996                                                 }
997                                         }
998                                 }
999                         }
1000                         in.close();
1001                         
1002                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1003                                 (*(filehandles[it3->first])).close();
1004                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1005                                 delete it3->second;
1006                         }
1007                 }
1008                                 
1009                 return 0;
1010
1011         }
1012         catch(exception& e) {
1013                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1014                 exit(1);
1015         }
1016 }
1017 /**********************************************************************************************************************/
1018