]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
changes while setting up test files
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::setParameters(){      
14         try {           
15                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
19                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SplitAbundCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SplitAbundCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
40                 helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
41                 helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
42                 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
43                 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
44                 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
45                 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
46                 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group.  \n";
47                 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n";
48                 helpString += "If you want .abund and .rare files for all groups, set groups=all.  \n";
49                 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
50                 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "SplitAbundCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61 SplitAbundCommand::SplitAbundCommand(){ 
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["list"] = tempOutNames;
67                 outputTypes["name"] = tempOutNames;
68                 outputTypes["accnos"] = tempOutNames;
69                 outputTypes["group"] = tempOutNames;
70                 outputTypes["fasta"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 SplitAbundCommand::SplitAbundCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;   
81                 allLines = 1;
82                         
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 
86                 else {
87                         vector<string> myArray = setParameters();
88                         
89                         OptionParser parser(option);
90                         map<string, string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string, string>::iterator it;
94                 
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["list"] = tempOutNames;
103                         outputTypes["name"] = tempOutNames;
104                         outputTypes["accnos"] = tempOutNames;
105                         outputTypes["group"] = tempOutNames;
106                         outputTypes["fasta"] = tempOutNames;                    
107                                                                                                 
108                         //if the user changes the input directory command factory will send this info to us in the output parameter 
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                                 string path;
113                                 it = parameters.find("list");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
119                                 }
120                                 
121                                 it = parameters.find("group");
122                                 //user has given a template file
123                                 if(it != parameters.end()){ 
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
127                                 }
128                                 
129                                 it = parameters.find("fasta");
130                                 //user has given a template file
131                                 if(it != parameters.end()){ 
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
135                                 }
136                                 
137                                 it = parameters.find("name");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
143                                 }
144
145                         }
146
147                         
148                         //if the user changes the output directory command factory will send this info to us in the output parameter 
149                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
150
151                         //check for required parameters
152                         listfile = validParameter.validFile(parameters, "list", true);
153                         if (listfile == "not open") { abort = true; }
154                         else if (listfile == "not found") { listfile = ""; }
155                         else{ inputFile = listfile; }   
156                         
157                         namefile = validParameter.validFile(parameters, "name", true);
158                         if (namefile == "not open") { abort = true; }
159                         else if (namefile == "not found") { namefile = ""; }    
160                         else{ inputFile = namefile; }   
161                 
162                         fastafile = validParameter.validFile(parameters, "fasta", true);
163                         if (fastafile == "not open") { abort = true; }
164                         else if (fastafile == "not found") {                            
165                                 fastafile = m->getFastaFile(); 
166                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
167                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
168                         }       
169                         
170                         groupfile = validParameter.validFile(parameters, "group", true);
171                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
172                         else if (groupfile == "not found") { groupfile = ""; }
173                         else {  
174                                 groupMap = new GroupMap(groupfile);
175                                 
176                                 int error = groupMap->readMap();
177                                 if (error == 1) { abort = true; }
178         
179                         }
180                         
181                         groups = validParameter.validFile(parameters, "groups", false);         
182                         if (groups == "not found") { groups = ""; }
183                         else if (groups == "all") { 
184                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
185                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
186                         }else { 
187                                 m->splitAtDash(groups, Groups);
188                         }
189                         
190                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
191                         
192                         //do you have all files needed
193                         if ((listfile == "") && (namefile == "")) { 
194                                 namefile = m->getNameFile(); 
195                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
196                                 else {                          
197                                         listfile = m->getListFile(); 
198                                         if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
199                                         else {  m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
200                                 }
201                         }
202                         
203                         //check for optional parameter and set defaults
204                         // ...at some point should added some additional type checking...
205                         label = validParameter.validFile(parameters, "label", false);                   
206                         if (label == "not found") { label = "";  allLines = 1; }
207                         else { 
208                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
209                                 else { allLines = 1;  }
210                         }
211                         
212                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
213                         accnos = m->isTrue(temp); 
214                         
215                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
216                         convert(temp, cutoff); 
217
218                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
219
220                 }
221
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
225                 exit(1);
226         }
227 }
228 //**********************************************************************************************************************
229 SplitAbundCommand::~SplitAbundCommand(){ 
230         if (groupfile != "") {  delete groupMap;  } 
231 }
232 //**********************************************************************************************************************
233 int SplitAbundCommand::execute(){
234         try {
235         
236                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
237                 
238                 if (listfile != "") { //you are using a listfile to determine abundance
239                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
240                         
241                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
242                         set<string> processedLabels;
243                         set<string> userLabels = labels;        
244                         
245                         input = new InputData(listfile, "list");
246                         list = input->getListVector();
247                         string lastLabel = list->getLabel();
248                         
249                         //do you have a namefile or do we need to similate one?
250                         if (namefile != "") {  readNamesFile();         }
251                         else                            { createNameMap(list);  }
252                         
253                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
254                         
255                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
256                         
257                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
258                                 
259                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
260                                                 
261                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
262                                                 splitList(list);
263                                                                                         
264                                                 processedLabels.insert(list->getLabel());
265                                                 userLabels.erase(list->getLabel());
266                                 }
267                                 
268                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
269                                                 string saveLabel = list->getLabel();
270                                                 
271                                                 delete list;
272                                                 list = input->getListVector(lastLabel); //get new list vector to process
273                                                 
274                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
275                                                 splitList(list);
276                                                 
277                                                 processedLabels.insert(list->getLabel());
278                                                 userLabels.erase(list->getLabel());
279                                                 
280                                                 //restore real lastlabel to save below
281                                                 list->setLabel(saveLabel);
282                                 }
283                                 
284                         
285                                 lastLabel = list->getLabel();
286                                         
287                                 delete list;
288                                 list = input->getListVector(); //get new list vector to process
289                         }
290                         
291                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
292                         
293                         //output error messages about any remaining user labels
294                         set<string>::iterator it;
295                         bool needToRun = false;
296                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
297                                 m->mothurOut("Your file does not include the label " + *it); 
298                                 if (processedLabels.count(lastLabel) != 1) {
299                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
300                                         needToRun = true;
301                                 }else {
302                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
303                                 }
304
305                         }
306                         
307                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
308                         
309                         //run last label if you need to
310                         if (needToRun == true)  {
311                                 if (list != NULL) {     delete list;    }
312                                 list = input->getListVector(lastLabel); //get new list vector to process
313                                 
314                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
315                                 splitList(list);                
316                                 
317                                 delete list;
318                         }
319                         
320                         delete input;
321                         
322                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
323                                                                         
324                 }else { //you are using the namefile to determine abundance
325                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
326                         
327                         splitNames(); 
328                         writeNames();
329                         
330                         string tag = "";
331                         if (groupfile != "")                            {  parseGroup(tag);             }
332                         if (accnos)                                                     {  writeAccnos(tag);    }
333                         if (fastafile != "")                            {  parseFasta(tag);             }
334                 }
335                 
336                 //set fasta file as new current fastafile
337                 string current = "";
338                 itTypes = outputTypes.find("fasta");
339                 if (itTypes != outputTypes.end()) {
340                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
341                 }
342                 
343                 itTypes = outputTypes.find("name");
344                 if (itTypes != outputTypes.end()) {
345                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
346                 }
347                 
348                 itTypes = outputTypes.find("group");
349                 if (itTypes != outputTypes.end()) {
350                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
351                 }
352                 
353                 itTypes = outputTypes.find("list");
354                 if (itTypes != outputTypes.end()) {
355                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
356                 }
357                 
358                 itTypes = outputTypes.find("accnos");
359                 if (itTypes != outputTypes.end()) {
360                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
361                 }
362                 
363                 m->mothurOutEndLine();
364                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
365                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
366                 m->mothurOutEndLine();
367                 
368                 return 0;
369         }
370         catch(exception& e) {
371                 m->errorOut(e, "SplitAbundCommand", "execute");
372                 exit(1);
373         }
374 }
375 /**********************************************************************************************************************/
376 int SplitAbundCommand::splitList(ListVector* thisList) {
377         try {
378                 rareNames.clear();
379                 abundNames.clear();
380                 
381                 //get rareNames and abundNames
382                 for (int i = 0; i < thisList->getNumBins(); i++) {
383                         if (m->control_pressed) { return 0; }
384                         
385                         string bin = thisList->get(i);
386                                                 
387                         vector<string> names;
388                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
389                         int size = names.size();
390                                 
391                         if (size <= cutoff) {
392                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
393                         }else{
394                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
395                         }
396                 }//end for
397
398                 
399                 string tag = thisList->getLabel() + ".";
400                 
401                 writeList(thisList, tag);
402                 
403                 if (groupfile != "")                            {  parseGroup(tag);             }
404                 if (accnos)                                                     {  writeAccnos(tag);    }
405                 if (fastafile != "")                            {  parseFasta(tag);             }
406                 
407                 return 0;
408
409         }
410         catch(exception& e) {
411                 m->errorOut(e, "SplitAbundCommand", "splitList");
412                 exit(1);
413         }
414 }
415 /**********************************************************************************************************************/
416 int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
417         try {
418                 
419                 map<string, ofstream*> filehandles;
420                 
421                 if (Groups.size() == 0) {
422                         SAbundVector* sabund = new SAbundVector();
423                         *sabund = thisList->getSAbundVector();
424                 
425                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
426                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
427                         int numRareBins = 0;
428                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
429                                 if (i > cutoff) { break; }
430                                 numRareBins += sabund->get(i);
431                         }
432                         int numAbundBins = thisList->getNumBins() - numRareBins;
433                         delete sabund;
434
435                         ofstream aout;
436                         ofstream rout;
437                         
438                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
439                         m->openOutputFile(rare, rout);
440                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
441                         
442                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
443                         m->openOutputFile(abund, aout);
444                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
445
446                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
447                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
448
449                         for (int i = 0; i < thisList->getNumBins(); i++) {
450                                 if (m->control_pressed) { break; }
451                         
452                                 string bin = list->get(i); 
453                         
454                                 int size = m->getNumNames(bin);
455                         
456                                 if (size <= cutoff) {  rout << bin << '\t';  }
457                                 else                            {  aout << bin << '\t'; }
458                         }
459                         
460                         if (rareNames.size() != 0)      { rout << endl; }
461                         if (abundNames.size() != 0) { aout << endl; }
462                         
463                         rout.close();
464                         aout.close();
465                         
466                 }else{ //parse names by abundance and group
467                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
468                         ofstream* temp;
469                         ofstream* temp2;
470                         //map<string, bool> wroteFile;
471                         map<string, ofstream*> filehandles;
472                         map<string, ofstream*>::iterator it3;
473
474                         for (int i=0; i<Groups.size(); i++) {
475                                 temp = new ofstream;
476                                 filehandles[Groups[i]+".rare"] = temp;
477                                 temp2 = new ofstream;
478                                 filehandles[Groups[i]+".abund"] = temp2;
479                                 
480                                 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
481                                 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
482                                 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
483                                 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
484                         }
485                         
486                         map<string, string> groupVector;
487                         map<string, string>::iterator itGroup;
488                         map<string, int> groupNumBins;
489                 
490                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
491                                 groupNumBins[it3->first] = 0;
492                                 groupVector[it3->first] = "";
493                         }
494                 
495                         for (int i = 0; i < thisList->getNumBins(); i++) {
496                                 if (m->control_pressed) { break; }
497                         
498                                 map<string, string> groupBins;
499                                 string bin = list->get(i); 
500                         
501                                 vector<string> names;
502                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
503                         
504                                 //parse bin into list of sequences in each group
505                                 for (int j = 0; j < names.size(); j++) {
506                                         string rareAbund;
507                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
508                                                 rareAbund = ".rare";
509                                         }else{ //you are a abund name
510                                                 rareAbund = ".abund";
511                                         }
512                                         
513                                         string group = groupMap->getGroup(names[j]);
514                                 
515                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
516                                                 itGroup = groupBins.find(group+rareAbund);
517                                                 if(itGroup == groupBins.end()) {
518                                                         groupBins[group+rareAbund] = names[j];  //add first name
519                                                         groupNumBins[group+rareAbund]++;
520                                                 }else{ //add another name
521                                                         groupBins[group+rareAbund] +=  "," + names[j];
522                                                 }
523                                         }else if(group == "not found") {
524                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
525                                         }
526                                 }
527                         
528                         
529                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
530                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
531                                 }
532                         }
533                         
534                         //end list vector
535                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
536                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
537                                 (*(filehandles[it3->first])).close();
538                                 delete it3->second;
539                         }
540                 }
541                 
542                 return 0;
543
544         }
545         catch(exception& e) {
546                 m->errorOut(e, "SplitAbundCommand", "writeList");
547                 exit(1);
548         }
549 }
550 /**********************************************************************************************************************/
551 int SplitAbundCommand::splitNames() { //namefile
552         try {
553                 
554                 rareNames.clear();
555                 abundNames.clear();     
556                         
557                 //open input file
558                 ifstream in;
559                 m->openInputFile(namefile, in);
560                 
561                 while (!in.eof()) {
562                         if (m->control_pressed) { break; }
563                         
564                         string firstCol, secondCol;
565                         in >> firstCol >> secondCol; m->gobble(in);
566                         
567                         nameMap[firstCol] = secondCol;
568                         
569                         int size = m->getNumNames(secondCol);
570                                 
571                         if (size <= cutoff) {
572                                 rareNames.insert(firstCol); 
573                         }else{
574                                 abundNames.insert(firstCol); 
575                         }
576                 }
577                 in.close();
578                                 
579                 return 0;
580
581         }
582         catch(exception& e) {
583                 m->errorOut(e, "SplitAbundCommand", "splitNames");
584                 exit(1);
585         }
586 }
587 /**********************************************************************************************************************/
588 int SplitAbundCommand::readNamesFile() { 
589         try {
590                 //open input file
591                 ifstream in;
592                 m->openInputFile(namefile, in);
593                 
594                 while (!in.eof()) {
595                         if (m->control_pressed) { break; }
596                         
597                         string firstCol, secondCol;
598                         in >> firstCol >> secondCol; m->gobble(in);
599                         
600                         nameMap[firstCol] = secondCol;
601                 }
602                 in.close();
603                                 
604                 return 0;
605
606         }
607         catch(exception& e) {
608                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
609                 exit(1);
610         }
611 }
612 /**********************************************************************************************************************/
613 int SplitAbundCommand::createNameMap(ListVector* thisList) {
614         try {
615                 
616                 if (thisList != NULL) {
617                         for (int i = 0; i < thisList->getNumBins(); i++) {
618                                 if (m->control_pressed) { return 0; }
619                                 
620                                 string bin = thisList->get(i);
621                                                         
622                                 vector<string> names;
623                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
624                                 
625                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
626                         }//end for
627                 }
628                 
629                 return 0;
630         }
631         catch(exception& e) {
632                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
633                 exit(1);
634         }
635 }
636 /**********************************************************************************************************************/
637 int SplitAbundCommand::writeNames() { //namefile
638         try {
639                 
640                 map<string, ofstream*> filehandles;
641
642                 if (Groups.size() == 0) {
643                         ofstream aout;
644                         ofstream rout;
645                         
646                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
647                         m->openOutputFile(rare, rout);
648                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
649                         
650                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
651                         m->openOutputFile(abund, aout);
652                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
653                         
654                         if (rareNames.size() != 0) {
655                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
656                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
657                                 }
658                         }
659                         rout.close();
660                         
661                         if (abundNames.size() != 0) {
662                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
663                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
664                                 }
665                         }
666                         aout.close();
667                         
668                 }else{ //parse names by abundance and group
669                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
670                         ofstream* temp;
671                         ofstream* temp2;
672                         map<string, ofstream*> filehandles;
673                         map<string, ofstream*>::iterator it3;
674
675                         for (int i=0; i<Groups.size(); i++) {
676                                 temp = new ofstream;
677                                 filehandles[Groups[i]+".rare"] = temp;
678                                 temp2 = new ofstream;
679                                 filehandles[Groups[i]+".abund"] = temp2;
680                                 
681                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
682                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
683                         }
684                         
685                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
686                                 vector<string> names;
687                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
688                                 
689                                 string rareAbund;
690                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
691                                                 rareAbund = ".rare";
692                                 }else{ //you are a abund name
693                                                 rareAbund = ".abund";
694                                 }
695                                 
696                                 map<string, string> outputStrings;
697                                 map<string, string>::iterator itout;
698                                 for (int i = 0; i < names.size(); i++) {
699                                         
700                                         string group = groupMap->getGroup(names[i]);
701                                         
702                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
703                                                 itout = outputStrings.find(group+rareAbund);
704                                                 if (itout == outputStrings.end()) {  
705                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
706                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
707                                         }else if(group == "not found") {
708                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
709                                         }
710                                 }
711                                 
712                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
713                         }
714                         
715                         
716                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
717                                 (*(filehandles[it3->first])).close();
718                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
719                                 delete it3->second;
720                         }
721                 }
722                                 
723                 return 0;
724
725         }
726         catch(exception& e) {
727                 m->errorOut(e, "SplitAbundCommand", "writeNames");
728                 exit(1);
729         }
730 }
731 /**********************************************************************************************************************/
732 //just write the unique names - if a namesfile is given
733 int SplitAbundCommand::writeAccnos(string tag) { 
734         try {
735                 
736                 map<string, ofstream*> filehandles;
737                 
738                 if (Groups.size() == 0) {
739                         ofstream aout;
740                         ofstream rout;
741                         
742                         
743                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
744                         m->openOutputFile(rare, rout);
745                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
746                         
747                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
748                                 rout << (*itRare) << endl;
749                         }
750                         rout.close();
751                 
752                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
753                         m->openOutputFile(abund, aout);
754                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
755                         
756                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
757                                 aout << (*itAbund) << endl;
758                         }
759                         aout.close();
760                         
761                 }else{ //parse names by abundance and group
762                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
763                         ofstream* temp;
764                         ofstream* temp2;
765                         map<string, ofstream*> filehandles;
766                         map<string, ofstream*>::iterator it3;
767                         
768                         for (int i=0; i<Groups.size(); i++) {
769                                 temp = new ofstream;
770                                 filehandles[Groups[i]+".rare"] = temp;
771                                 temp2 = new ofstream;
772                                 filehandles[Groups[i]+".abund"] = temp2;
773                                 
774                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
775                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
776                         }
777                         
778                         //write rare
779                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
780                                         string group = groupMap->getGroup(*itRare);
781                                         
782                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
783                                                 *(filehandles[group+".rare"]) << *itRare << endl;
784                                         }
785                         }
786                                 
787                         //write abund   
788                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
789                                         string group = groupMap->getGroup(*itAbund);
790                                         
791                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
792                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
793                                         }
794                         }
795                         
796                         //close files
797                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
798                                 (*(filehandles[it3->first])).close();
799                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
800                                 delete it3->second;
801                         }
802                 }
803                                 
804                 return 0;
805
806         }
807         catch(exception& e) {
808                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
809                 exit(1);
810         }
811 }
812 /**********************************************************************************************************************/
813 int SplitAbundCommand::parseGroup(string tag) { //namefile
814         try {
815                 
816                 map<string, ofstream*> filehandles;
817         
818                 if (Groups.size() == 0) {
819                         ofstream aout;
820                         ofstream rout;
821                         
822                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
823                         m->openOutputFile(rare, rout);
824                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
825                 
826                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
827                         m->openOutputFile(abund, aout);
828                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
829                         
830                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
831                                 vector<string> names;
832                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
833                                 
834                                 for (int i = 0; i < names.size(); i++) {
835                                 
836                                         string group = groupMap->getGroup(names[i]);
837                                 
838                                         if (group == "not found") { 
839                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
840                                         }else {
841                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
842                                                         rout << names[i] << '\t' << group << endl;
843                                                 }else{ //you are a abund name
844                                                         aout << names[i] << '\t' << group << endl;
845                                                 }
846                                         }
847                                 }
848                         }
849                         
850                         rout.close(); 
851                         aout.close(); 
852
853                 }else{ //parse names by abundance and group
854                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
855                         ofstream* temp;
856                         ofstream* temp2;
857                         map<string, ofstream*> filehandles;
858                         map<string, ofstream*>::iterator it3;
859
860                         for (int i=0; i<Groups.size(); i++) {
861                                 temp = new ofstream;
862                                 filehandles[Groups[i]+".rare"] = temp;
863                                 temp2 = new ofstream;
864                                 filehandles[Groups[i]+".abund"] = temp2;
865                                 
866                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
867                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
868                         }
869                         
870                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
871                                 vector<string> names;
872                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
873                                 
874                                 string rareAbund;
875                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
876                                         rareAbund = ".rare";
877                                 }else{ //you are a abund name
878                                         rareAbund = ".abund";
879                                 }
880                                 
881                                 for (int i = 0; i < names.size(); i++) {
882                                 
883                                         string group = groupMap->getGroup(names[i]);
884                                                                         
885                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
886                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
887                                         }
888                                 }
889                         }
890                         
891                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
892                                 (*(filehandles[it3->first])).close();
893                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
894                                 delete it3->second;
895                         }
896                 }
897                                 
898                 return 0;
899
900         }
901         catch(exception& e) {
902                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
903                 exit(1);
904         }
905 }
906 /**********************************************************************************************************************/
907 int SplitAbundCommand::parseFasta(string tag) { //namefile
908         try {
909                 
910                 map<string, ofstream*> filehandles;
911                 
912                 if (Groups.size() == 0) {
913                         ofstream aout;
914                         ofstream rout;
915                         
916                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
917                         m->openOutputFile(rare, rout);
918                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
919                 
920                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
921                         m->openOutputFile(abund, aout);
922                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
923                 
924                         //open input file
925                         ifstream in;
926                         m->openInputFile(fastafile, in);
927         
928                         while (!in.eof()) {
929                                 if (m->control_pressed) { break; }
930                 
931                                 Sequence seq(in); m->gobble(in);
932                                 
933                                 if (seq.getName() != "") { 
934                                         
935                                         map<string, string>::iterator itNames;
936                                         
937                                         itNames = nameMap.find(seq.getName());
938                                         
939                                         if (itNames == nameMap.end()) {
940                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
941                                         }else{
942                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
943                                                         seq.printSequence(rout);
944                                                 }else{ //you are a abund name
945                                                         seq.printSequence(aout);
946                                                 }
947                                         }
948                                 }
949                         }
950                         in.close();
951                         rout.close(); 
952                         aout.close(); 
953
954                 }else{ //parse names by abundance and group
955                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
956                         ofstream* temp;
957                         ofstream* temp2;
958                         map<string, ofstream*> filehandles;
959                         map<string, ofstream*>::iterator it3;
960
961                         for (int i=0; i<Groups.size(); i++) {
962                                 temp = new ofstream;
963                                 filehandles[Groups[i]+".rare"] = temp;
964                                 temp2 = new ofstream;
965                                 filehandles[Groups[i]+".abund"] = temp2;
966                                 
967                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
968                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
969                         }
970                         
971                         //open input file
972                         ifstream in;
973                         m->openInputFile(fastafile, in);
974         
975                         while (!in.eof()) {
976                                 if (m->control_pressed) { break; }
977                 
978                                 Sequence seq(in); m->gobble(in);
979                                 
980                                 if (seq.getName() != "") { 
981                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
982                                         
983                                         if (itNames == nameMap.end()) {
984                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
985                                         }else{
986                                                 vector<string> names;
987                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
988                                 
989                                                 string rareAbund;
990                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
991                                                         rareAbund = ".rare";
992                                                 }else{ //you are a abund name
993                                                         rareAbund = ".abund";
994                                                 }
995                                 
996                                                 for (int i = 0; i < names.size(); i++) {
997                                 
998                                                         string group = groupMap->getGroup(seq.getName());
999                                         
1000                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1001                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
1002                                                         }else if(group == "not found") {
1003                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1004                                                         }
1005                                                 }
1006                                         }
1007                                 }
1008                         }
1009                         in.close();
1010                         
1011                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1012                                 (*(filehandles[it3->first])).close();
1013                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1014                                 delete it3->second;
1015                         }
1016                 }
1017                                 
1018                 return 0;
1019
1020         }
1021         catch(exception& e) {
1022                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1023                 exit(1);
1024         }
1025 }
1026 /**********************************************************************************************************************/
1027