]> git.donarmstrong.com Git - mothur.git/blob - removerarecommand.cpp
added calcs to tree.shared. working on remove.rare command
[mothur.git] / removerarecommand.cpp
1 /*
2  *  removerarecommand.cpp
3  *  mothur
4  *
5  *  Created by westcott on 1/21/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removerarecommand.h"
11 #include "sequence.hpp"
12 #include "groupmap.h"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> RemoveRareCommand::getValidParameters(){ 
18         try {
19                 string Array[] =  {"rabund","sabund", "group", "list", "shared", "nseqs","groups","label","outputdir","inputdir"};
20                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21                 return myArray;
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "RemoveRareCommand", "getValidParameters");
25                 exit(1);
26         }
27 }
28 //**********************************************************************************************************************
29 RemoveRareCommand::RemoveRareCommand(){ 
30         try {
31                 abort = true;
32                 //initialize outputTypes
33                 vector<string> tempOutNames;
34                 outputTypes["rabund"] = tempOutNames;
35                 outputTypes["sabund"] = tempOutNames;
36                 outputTypes["list"] = tempOutNames;
37                 outputTypes["group"] = tempOutNames;
38                 outputTypes["shared"] = tempOutNames;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand");
42                 exit(1);
43         }
44 }
45 //**********************************************************************************************************************
46 vector<string> RemoveRareCommand::getRequiredParameters(){      
47         try {
48                 string Array[] =  {"nseqs"};
49                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "RemoveRareCommand", "getRequiredParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 vector<string> RemoveRareCommand::getRequiredFiles(){   
59         try {
60                 vector<string> myArray;
61                 return myArray;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "RemoveRareCommand", "getRequiredFiles");
65                 exit(1);
66         }
67 }
68 //**********************************************************************************************************************
69 RemoveRareCommand::RemoveRareCommand(string option)  {
70         try {
71                 abort = false;
72                 allLines = 1;
73                 
74                 //allow user to run help
75                 if(option == "help") { help(); abort = true; }
76                 
77                 else {
78                         //valid paramters for this command
79                         string Array[] =  {"rabund","sabund", "group", "list", "shared", "nseqs","groups","label","outputdir","inputdir"};
80                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
81                         
82                         OptionParser parser(option);
83                         map<string,string> parameters = parser.getParameters();
84                         
85                         ValidParameters validParameter;
86                         map<string,string>::iterator it;
87                         
88                         //check to make sure all parameters are valid for command
89                         for (it = parameters.begin(); it != parameters.end(); it++) { 
90                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
91                         }
92                         
93                         //initialize outputTypes
94                         vector<string> tempOutNames;
95                         outputTypes["rabund"] = tempOutNames;
96                         outputTypes["sabund"] = tempOutNames;
97                         outputTypes["list"] = tempOutNames;
98                         outputTypes["group"] = tempOutNames;
99                         outputTypes["shared"] = tempOutNames;   
100                         
101                         //if the user changes the output directory command factory will send this info to us in the output parameter 
102                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
103                         
104                         //if the user changes the input directory command factory will send this info to us in the output parameter 
105                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
106                         if (inputDir == "not found"){   inputDir = "";          }
107                         else {
108                                 string path;
109                                 it = parameters.find("list");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
115                                 }
116                                 
117                                 it = parameters.find("group");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
123                                 }
124                                 
125                                 it = parameters.find("sabund");
126                                 //user has given a template file
127                                 if(it != parameters.end()){ 
128                                         path = m->hasPath(it->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
131                                 }
132                                 
133                                 it = parameters.find("rabund");
134                                 //user has given a template file
135                                 if(it != parameters.end()){ 
136                                         path = m->hasPath(it->second);
137                                         //if the user has not given a path then, add inputdir. else leave path alone.
138                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
139                                 }
140                                 
141                                 it = parameters.find("shared");
142                                 //user has given a template file
143                                 if(it != parameters.end()){ 
144                                         path = m->hasPath(it->second);
145                                         //if the user has not given a path then, add inputdir. else leave path alone.
146                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
147                                 }
148                         }
149                         
150                         
151                         //check for file parameters
152                         listfile = validParameter.validFile(parameters, "list", true);
153                         if (listfile == "not open") { abort = true; }
154                         else if (listfile == "not found") {  listfile = "";  }  
155                         
156                         sabundfile = validParameter.validFile(parameters, "sabund", true);
157                         if (sabundfile == "not open") { abort = true; }
158                         else if (sabundfile == "not found") {  sabundfile = "";  }      
159                         
160                         rabundfile = validParameter.validFile(parameters, "rabund", true);
161                         if (rabundfile == "not open") { abort = true; }
162                         else if (rabundfile == "not found") {  rabundfile = "";  }                              
163                         
164                         groupfile = validParameter.validFile(parameters, "group", true);
165                         if (groupfile == "not open") { abort = true; }
166                         else if (groupfile == "not found") {  groupfile = "";  }        
167                         
168                         sharedfile = validParameter.validFile(parameters, "shared", true);
169                         if (sharedfile == "not open") { abort = true; }
170                         else if (sharedfile == "not found") {  sharedfile = "";  }
171                         
172                         if ((sabundfile == "") && (rabundfile == "")  && (sharedfile == "") && (listfile == ""))  { m->mothurOut("You must provide at least one of the following: rabund, sabund, shared or list."); m->mothurOutEndLine(); abort = true; }
173                         
174                         groups = validParameter.validFile(parameters, "groups", false);                 
175                         if (groups == "not found") { groups = "all"; }
176                         m->splitAtDash(groups, Groups);
177                         
178                         label = validParameter.validFile(parameters, "label", false);                   
179                         if (label == "not found") { label = ""; }
180                         else { 
181                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
182                                 else { allLines = 1;  }
183                         }
184                         
185                         string temp = validParameter.validFile(parameters, "nseqs", false);      
186                         if (temp == "not found") { m->mothurOut("nseqs is a required parameter."); m->mothurOutEndLine(); abort = true; }
187                         else { convert(temp, nseqs); }
188                 }
189                 
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "RemoveRareCommand", "RemoveRareCommand");
193                 exit(1);
194         }
195 }
196 //**********************************************************************************************************************
197
198 void RemoveRareCommand::help(){
199         try {
200                 //m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
201                 //m->mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
202                 //m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n");
203                 //m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n");
204                 //m->mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
205                 //m->mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
206                 //m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
207         }
208         catch(exception& e) {
209                 m->errorOut(e, "RemoveRareCommand", "help");
210                 exit(1);
211         }
212 }
213
214 //**********************************************************************************************************************
215
216 int RemoveRareCommand::execute(){
217         try {
218                 
219                 if (abort == true) { return 0; }
220                 
221                 if (m->control_pressed) { return 0; }
222                 
223                 //read through the correct file and output lines you want to keep
224                 if (sabundfile != "")           {               processSabund();        }
225                 if (rabundfile != "")           {               processRabund();        }
226                 if (listfile != "")                     {               processList();          }
227                 //if (sharedfile != "")         {               processShared();        }
228                 
229                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
230                         
231                 if (outputNames.size() != 0) {
232                         m->mothurOutEndLine();
233                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
234                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
235                         m->mothurOutEndLine();
236                 }
237                 
238                 return 0;               
239         }
240         
241         catch(exception& e) {
242                 m->errorOut(e, "RemoveRareCommand", "execute");
243                 exit(1);
244         }
245 }
246
247 //**********************************************************************************************************************
248 int RemoveRareCommand::processList(){
249         try {
250                 string thisOutputDir = outputDir;
251                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
252                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
253                 string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" +  m->getExtension(groupfile);
254                 
255                 ofstream out, outGroup;
256                 m->openOutputFile(outputFileName, out);
257                 
258                 bool wroteSomething = false;
259                 
260                 //you must provide a label because the names in the listfile need to be consistent
261                 string thisLabel = "";
262                 if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
263                 else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
264                 else { thisLabel = *labels.begin(); }
265                 
266                 InputData input(listfile, "list");
267                 ListVector* list = input.getListVector();
268                 
269                 //get first one or the one we want
270                 if (thisLabel != "") {  
271                         //use smart distancing
272                         set<string> userLabels; userLabels.insert(thisLabel);
273                         set<string> processedLabels;
274                         string lastLabel = list->getLabel();
275                         while((list != NULL) && (userLabels.size() != 0)) {
276                                 if(userLabels.count(list->getLabel()) == 1){
277                                         processedLabels.insert(list->getLabel());
278                                         userLabels.erase(list->getLabel());
279                                         break;
280                                 }
281                                 
282                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
283                                         processedLabels.insert(list->getLabel());
284                                         userLabels.erase(list->getLabel());
285                                         delete list;
286                                         list = input.getListVector(lastLabel);
287                                         break;
288                                 }
289                                 lastLabel = list->getLabel();
290                                 delete list;
291                                 list = input.getListVector();
292                         }
293                         if (userLabels.size() != 0) { 
294                                 m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
295                                 list = input.getListVector(lastLabel); 
296                         }
297                 }
298                 
299                 //if groupfile is given then use it
300                 GroupMap* groupMap;
301                 if (groupfile != "") { 
302                         groupMap = new GroupMap(groupfile); groupMap->readMap(); 
303                         SharedUtil util;
304                         util.setGroups(Groups, groupMap->namesOfGroups);
305                         m->openOutputFile(outputGroupFileName, outGroup);
306                 }
307                 
308                 
309                 if (list != NULL) {     
310                         //make a new list vector
311                         ListVector newList;
312                         newList.setLabel(list->getLabel());
313                         
314                         //for each bin
315                         for (int i = 0; i < list->getNumBins(); i++) {
316                                 if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); remove(outputGroupFileName.c_str()); } out.close();  remove(outputFileName.c_str());  return 0; }
317                                 
318                                 //parse out names that are in accnos file
319                                 string binnames = list->get(i);
320                                 vector<string> names;
321                                 string saveBinNames = binnames;
322                                 m->splitAtComma(binnames, names);
323                                 
324                                 vector<string> newGroupFile;
325                                 if (groupfile != "") {
326                                         vector<string> newNames;
327                                         saveBinNames = "";
328                                         for(int k = 0; k < names.size(); k++) {
329                                                 string group = groupMap->getGroup(names[k]);
330                                                 
331                                                 if (m->inUsersGroups(group, Groups)) {
332                                                         newGroupFile.push_back(names[k] + "\t" + group); 
333                                                                 
334                                                         newNames.push_back(names[k]);   
335                                                         saveBinNames += names[k] + ",";
336                                                 }
337                                         }
338                                         names = newNames;
339                                         saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
340                                 }
341
342                                 if (names.size() > nseqs) { //keep bin
343                                         newList.push_back(saveBinNames);
344                                         for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }
345                                 }
346                         }
347                         
348                         //print new listvector
349                         if (newList.getNumBins() != 0) {
350                                 wroteSomething = true;
351                                 newList.print(out);
352                         }
353                 }       
354                 
355                 out.close();
356                 if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
357                 
358                 if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
359                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
360                 
361                 return 0;
362         }
363         catch(exception& e) {
364                 m->errorOut(e, "RemoveRareCommand", "processList");
365                 exit(1);
366         }
367 }
368 //**********************************************************************************************************************
369 int RemoveRareCommand::processSabund(){
370         try {
371                 string thisOutputDir = outputDir;
372                 if (outputDir == "") {  thisOutputDir += m->hasPath(sabundfile);  }
373                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sabundfile)) + "pick" +  m->getExtension(sabundfile);
374                 outputTypes["sabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
375
376                 ofstream out;
377                 m->openOutputFile(outputFileName, out);
378                 
379                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
380                 InputData input(sabundfile, "sabund");
381                 SAbundVector* sabund = input.getSAbundVector();
382                 string lastLabel = sabund->getLabel();
383                 set<string> processedLabels;
384                 set<string> userLabels = labels;
385                 
386                 while((sabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
387                         
388                         if (m->control_pressed) { delete sabund; out.close(); return 0; }
389                         
390                         if(allLines == 1 || labels.count(sabund->getLabel()) == 1){                     
391                                 
392                                 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
393                                 processedLabels.insert(sabund->getLabel());
394                                 userLabels.erase(sabund->getLabel());
395                                 
396                                 if (sabund->getMaxRank() > nseqs) {
397                                         for(int i = 1; i <=nseqs; i++) {  sabund->set(i, 0); }
398                                 }else { sabund->clear(); }
399                                 
400                                 if (sabund->getNumBins() > 0) { sabund->print(out); }
401                         }
402                         
403                         if ((m->anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
404                                 string saveLabel = sabund->getLabel();
405                                 
406                                 delete sabund;
407                                 sabund = input.getSAbundVector(lastLabel);
408                                 
409                                 m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
410                                 processedLabels.insert(sabund->getLabel());
411                                 userLabels.erase(sabund->getLabel());
412                                 
413                                 if (sabund->getMaxRank() > nseqs) {
414                                         for(int i = 1; i <=nseqs; i++) {  sabund->set(i, 0); }
415                                 }else { sabund->clear(); }
416                                 
417                                 if (sabund->getNumBins() > 0) { sabund->print(out); }
418                                                                 
419                                 //restore real lastlabel to save below
420                                 sabund->setLabel(saveLabel);
421                         }               
422                         
423                         lastLabel = sabund->getLabel();                 
424                         
425                         delete sabund;
426                         sabund = input.getSAbundVector();
427                 }
428                 
429                 if (m->control_pressed) {  out.close(); return 0; }     
430                 
431                 //output error messages about any remaining user labels
432                 set<string>::iterator it;
433                 bool needToRun = false;
434                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
435                         m->mothurOut("Your file does not include the label " + *it); 
436                         if (processedLabels.count(lastLabel) != 1) {
437                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
438                                 needToRun = true;
439                         }else {
440                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
441                         }
442                 }
443                 
444                 //run last label if you need to
445                 if (needToRun == true)  {
446                         if (sabund != NULL) {   delete sabund;  }
447                         sabund = input.getSAbundVector(lastLabel);
448                         
449                         m->mothurOut(sabund->getLabel()); m->mothurOutEndLine();
450                         
451                         if (sabund->getMaxRank() > nseqs) {
452                                 for(int i = 1; i <=nseqs; i++) {  sabund->set(i, 0); }
453                         }else { sabund->clear(); }
454                         
455                         if (sabund->getNumBins() > 0) { sabund->print(out); }
456                         
457                         delete sabund;
458                 }
459                 
460                 return 0;
461         }
462         catch(exception& e) {
463                 m->errorOut(e, "RemoveRareCommand", "processSabund");
464                 exit(1);
465         }
466 }
467 //**********************************************************************************************************************
468 int RemoveRareCommand::processRabund(){
469         try {
470                 string thisOutputDir = outputDir;
471                 if (outputDir == "") {  thisOutputDir += m->hasPath(rabundfile);  }
472                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(rabundfile)) + "pick" +  m->getExtension(rabundfile);
473                 outputTypes["rabund"].push_back(outputFileName); outputNames.push_back(outputFileName);
474                 
475                 ofstream out;
476                 m->openOutputFile(outputFileName, out);
477                 
478                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
479                 InputData input(rabundfile, "rabund");
480                 RAbundVector* rabund = input.getRAbundVector();
481                 string lastLabel = rabund->getLabel();
482                 set<string> processedLabels;
483                 set<string> userLabels = labels;
484                 
485                 while((rabund != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
486                         
487                         if (m->control_pressed) { delete rabund; out.close(); return 0; }
488                         
489                         if(allLines == 1 || labels.count(rabund->getLabel()) == 1){                     
490                                 
491                                 m->mothurOut(rabund->getLabel()); m->mothurOutEndLine();
492                                 processedLabels.insert(rabund->getLabel());
493                                 userLabels.erase(rabund->getLabel());
494                                 
495                                 RAbundVector newRabund; newRabund.setLabel(rabund->getLabel());
496                                 for (int i = 0; i < rabund->getNumBins(); i++) {
497                                         if (rabund->get(i) > nseqs) {
498                                                 newRabund.push_back(rabund->get(i));
499                                         }
500                                 }
501                                 if (newRabund.getNumBins() > 0) { newRabund.print(out); }
502                         }
503                         
504                         if ((m->anyLabelsToProcess(rabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
505                                 string saveLabel = rabund->getLabel();
506                                 
507                                 delete rabund;
508                                 rabund = input.getRAbundVector(lastLabel);
509                                 
510                                 m->mothurOut(rabund->getLabel()); m->mothurOutEndLine();
511                                 processedLabels.insert(rabund->getLabel());
512                                 userLabels.erase(rabund->getLabel());
513                                 
514                                 RAbundVector newRabund; newRabund.setLabel(rabund->getLabel());
515                                 for (int i = 0; i < rabund->getNumBins(); i++) {
516                                         if (rabund->get(i) > nseqs) {
517                                                 newRabund.push_back(rabund->get(i));
518                                         }
519                                 }
520                                 if (newRabund.getNumBins() > 0) { newRabund.print(out); }                               
521                                 
522                                 //restore real lastlabel to save below
523                                 rabund->setLabel(saveLabel);
524                         }               
525                         
526                         lastLabel = rabund->getLabel();                 
527                         
528                         delete rabund;
529                         rabund = input.getRAbundVector();
530                 }
531                 
532                 if (m->control_pressed) {  out.close(); return 0; }     
533                 
534                 //output error messages about any remaining user labels
535                 set<string>::iterator it;
536                 bool needToRun = false;
537                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
538                         m->mothurOut("Your file does not include the label " + *it); 
539                         if (processedLabels.count(lastLabel) != 1) {
540                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
541                                 needToRun = true;
542                         }else {
543                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
544                         }
545                 }
546                 
547                 //run last label if you need to
548                 if (needToRun == true)  {
549                         if (rabund != NULL) {   delete rabund;  }
550                         rabund = input.getRAbundVector(lastLabel);
551                         
552                         m->mothurOut(rabund->getLabel()); m->mothurOutEndLine();
553                         
554                         RAbundVector newRabund; newRabund.setLabel(rabund->getLabel());
555                         for (int i = 0; i < rabund->getNumBins(); i++) {
556                                 if (rabund->get(i) > nseqs) {
557                                         newRabund.push_back(rabund->get(i));
558                                 }
559                         }
560                         if (newRabund.getNumBins() > 0) { newRabund.print(out); }       
561                         
562                         delete rabund;
563                 }
564                 
565                 return 0;
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "RemoveRareCommand", "processSabund");
569                 exit(1);
570         }
571 }
572 //**********************************************************************************************************************
573
574
575
576