]> git.donarmstrong.com Git - mothur.git/blob - removeseqscommand.cpp
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
[mothur.git] / removeseqscommand.cpp
1 /*
2  *  removeseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveSeqsCommand::setParameters(){      
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
23                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
24                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
25                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
26                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "RemoveSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string RemoveSeqsCommand::getHelpString(){      
41         try {
42                 string helpString = "";
43                 helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
45                 helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
46                 helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
47                 helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "RemoveSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string RemoveSeqsCommand::getOutputFileNameTag(string type, string inputName=""){       
59         try {
60         string outputFileName = "";
61                 map<string, vector<string> >::iterator it;
62         
63         //is this a type this command creates
64         it = outputTypes.find(type);
65         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
66         else {
67             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
68             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
69             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
70             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
71             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
74             else if (type == "count")       {   outputFileName =  "pick.count.table";   }
75             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
76         }
77         return outputFileName;
78         }
79         catch(exception& e) {
80                 m->errorOut(e, "RemoveSeqsCommand", "getOutputFileNameTag");
81                 exit(1);
82         }
83 }
84
85 //**********************************************************************************************************************
86 RemoveSeqsCommand::RemoveSeqsCommand(){ 
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["fasta"] = tempOutNames;
92                 outputTypes["taxonomy"] = tempOutNames;
93                 outputTypes["name"] = tempOutNames;
94                 outputTypes["group"] = tempOutNames;
95                 outputTypes["alignreport"] = tempOutNames;
96                 outputTypes["list"] = tempOutNames;
97                 outputTypes["qfile"] = tempOutNames;
98         outputTypes["count"] = tempOutNames;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
107         try {
108                 abort = false; calledHelp = false;   
109                 
110                 //allow user to run help
111                 if(option == "help") { help(); abort = true; calledHelp = true; }
112                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113                 
114                 else {
115                         vector<string> myArray = setParameters();
116                         
117                         OptionParser parser(option);
118                         map<string,string> parameters = parser.getParameters();
119                         
120                         ValidParameters validParameter;
121                         map<string,string>::iterator it;
122                         
123                         //check to make sure all parameters are valid for command
124                         for (it = parameters.begin(); it != parameters.end(); it++) { 
125                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
126                         }
127                         
128                         //initialize outputTypes
129                         vector<string> tempOutNames;
130                         outputTypes["fasta"] = tempOutNames;
131                         outputTypes["taxonomy"] = tempOutNames;
132                         outputTypes["name"] = tempOutNames;
133                         outputTypes["group"] = tempOutNames;
134                         outputTypes["alignreport"] = tempOutNames;
135                         outputTypes["list"] = tempOutNames;
136                         outputTypes["qfile"] = tempOutNames;
137             outputTypes["count"] = tempOutNames;
138                         
139                         //if the user changes the output directory command factory will send this info to us in the output parameter 
140                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
141                         
142                         //if the user changes the input directory command factory will send this info to us in the output parameter 
143                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
144                         if (inputDir == "not found"){   inputDir = "";          }
145                         else {
146                                 string path;
147                                 it = parameters.find("alignreport");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
153                                 }
154                                 
155                                 it = parameters.find("fasta");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
161                                 }
162                                 
163                                 it = parameters.find("accnos");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
169                                 }
170                                 
171                                 it = parameters.find("list");
172                                 //user has given a template file
173                                 if(it != parameters.end()){ 
174                                         path = m->hasPath(it->second);
175                                         //if the user has not given a path then, add inputdir. else leave path alone.
176                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
177                                 }
178                                 
179                                 it = parameters.find("name");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
185                                 }
186                                 
187                                 it = parameters.find("group");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
193                                 }
194                                 
195                                 it = parameters.find("taxonomy");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
201                                 }
202                                 
203                                 it = parameters.find("qfile");
204                                 //user has given a template file
205                                 if(it != parameters.end()){ 
206                                         path = m->hasPath(it->second);
207                                         //if the user has not given a path then, add inputdir. else leave path alone.
208                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
209                                 }
210                 
211                 it = parameters.find("count");
212                                 //user has given a template file
213                                 if(it != parameters.end()){ 
214                                         path = m->hasPath(it->second);
215                                         //if the user has not given a path then, add inputdir. else leave path alone.
216                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
217                                 }
218                         }
219
220                         
221                         //check for required parameters
222                         accnosfile = validParameter.validFile(parameters, "accnos", true);
223                         if (accnosfile == "not open") { abort = true; }
224                         else if (accnosfile == "not found") {  
225                                 accnosfile = m->getAccnosFile(); 
226                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
227                                 else { 
228                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
229                                         abort = true;
230                                 }  
231                         }else { m->setAccnosFile(accnosfile); } 
232                         
233                         fastafile = validParameter.validFile(parameters, "fasta", true);
234                         if (fastafile == "not open") { fastafile = ""; abort = true; }
235                         else if (fastafile == "not found") {  fastafile = "";  }        
236                         else { m->setFastaFile(fastafile); }
237                                                                    
238                         namefile = validParameter.validFile(parameters, "name", true);
239                         if (namefile == "not open") { namefile = ""; abort = true; }
240                         else if (namefile == "not found") {  namefile = "";  }  
241                         else { m->setNameFile(namefile); } 
242                                                                    
243                         groupfile = validParameter.validFile(parameters, "group", true);
244                         if (groupfile == "not open") { abort = true; }
245                         else if (groupfile == "not found") {  groupfile = "";  }
246                         else { m->setGroupFile(groupfile); }
247                         
248                         alignfile = validParameter.validFile(parameters, "alignreport", true);
249                         if (alignfile == "not open") { abort = true; }
250                         else if (alignfile == "not found") {  alignfile = "";  }
251                         
252                         listfile = validParameter.validFile(parameters, "list", true);
253                         if (listfile == "not open") { abort = true; }
254                         else if (listfile == "not found") {  listfile = "";  }
255                         else { m->setListFile(listfile); }
256                         
257                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
258                         if (taxfile == "not open") { abort = true; }
259                         else if (taxfile == "not found") {  taxfile = "";  }
260                         else { m->setTaxonomyFile(taxfile); }
261                         
262                         qualfile = validParameter.validFile(parameters, "qfile", true);
263                         if (qualfile == "not open") { abort = true; }
264                         else if (qualfile == "not found") {  qualfile = "";  }                  
265                         else { m->setQualFile(qualfile); }
266                         
267                         string usedDups = "true";
268                         string temp = validParameter.validFile(parameters, "dups", false);      
269                         if (temp == "not found") { 
270                                 if (namefile != "") {  temp = "true";                                   }
271                                 else                            {  temp = "false"; usedDups = "";       }
272                         }
273                         dups = m->isTrue(temp);
274             
275             countfile = validParameter.validFile(parameters, "count", true);
276             if (countfile == "not open") { countfile = ""; abort = true; }
277             else if (countfile == "not found") { countfile = "";  }     
278             else { m->setCountTableFile(countfile); }
279             
280             if ((namefile != "") && (countfile != "")) {
281                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
282             }
283             
284             if ((groupfile != "") && (countfile != "")) {
285                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
286             }
287                         
288                         if ((countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
289                         
290             if (countfile == "") {
291                 if ((fastafile != "") && (namefile == "")) {
292                     vector<string> files; files.push_back(fastafile);
293                     parser.getNameFile(files);
294                 }
295             }
296                 }
297
298         }
299         catch(exception& e) {
300                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
301                 exit(1);
302         }
303 }
304 //**********************************************************************************************************************
305
306 int RemoveSeqsCommand::execute(){
307         try {
308                 
309                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
310                 
311                 //get names you want to keep
312                 names = m->readAccnos(accnosfile);
313                 
314                 if (m->control_pressed) { return 0; }
315         
316         if (countfile != "") {
317             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
318                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
319             }
320         }
321                 
322                 //read through the correct file and output lines you want to keep
323                 if (namefile != "")                     {               readName();             }
324                 if (fastafile != "")            {               readFasta();    }
325                 if (groupfile != "")            {               readGroup();    }
326                 if (alignfile != "")            {               readAlign();    }
327                 if (listfile != "")                     {               readList();             }
328                 if (taxfile != "")                      {               readTax();              }
329                 if (qualfile != "")                     {               readQual();             }
330         if (countfile != "")            {               readCount();            }
331                 
332                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
333         
334                 if (outputNames.size() != 0) {
335                         m->mothurOutEndLine();
336                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
337                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
338                         m->mothurOutEndLine();
339                         
340                         //set fasta file as new current fastafile
341                         string current = "";
342                         itTypes = outputTypes.find("fasta");
343                         if (itTypes != outputTypes.end()) {
344                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
345                         }
346                         
347                         itTypes = outputTypes.find("name");
348                         if (itTypes != outputTypes.end()) {
349                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
350                         }
351                         
352                         itTypes = outputTypes.find("group");
353                         if (itTypes != outputTypes.end()) {
354                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
355                         }
356                         
357                         itTypes = outputTypes.find("list");
358                         if (itTypes != outputTypes.end()) {
359                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
360                         }
361                         
362                         itTypes = outputTypes.find("taxonomy");
363                         if (itTypes != outputTypes.end()) {
364                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
365                         }
366                         
367                         itTypes = outputTypes.find("qfile");
368                         if (itTypes != outputTypes.end()) {
369                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
370                         }       
371             
372             itTypes = outputTypes.find("count");
373                         if (itTypes != outputTypes.end()) {
374                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
375                         }
376                 }
377                 
378                 return 0;               
379         }
380
381         catch(exception& e) {
382                 m->errorOut(e, "RemoveSeqsCommand", "execute");
383                 exit(1);
384         }
385 }
386
387 //**********************************************************************************************************************
388 int RemoveSeqsCommand::readFasta(){
389         try {
390                 string thisOutputDir = outputDir;
391                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
392                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
393                 
394                 ofstream out;
395                 m->openOutputFile(outputFileName, out);
396                 
397                 ifstream in;
398                 m->openInputFile(fastafile, in);
399                 string name;
400                 
401                 bool wroteSomething = false;
402                 int removedCount = 0;
403                 
404                 while(!in.eof()){
405                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
406                         
407                         Sequence currSeq(in);
408                         name = currSeq.getName();
409                         
410                         if (name != "") {
411                                 //if this name is in the accnos file
412                                 if (names.count(name) == 0) {
413                                         wroteSomething = true;
414                                         
415                                         currSeq.printSequence(out);
416                                 }else {  removedCount++;  }
417                         }
418                         m->gobble(in);
419                 }
420                 in.close();     
421                 out.close();
422                 
423                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
424                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
425                 
426                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
427                 
428                 return 0;
429                 
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "RemoveSeqsCommand", "readFasta");
433                 exit(1);
434         }
435 }
436 //**********************************************************************************************************************
437 int RemoveSeqsCommand::readQual(){
438         try {
439                 string thisOutputDir = outputDir;
440                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
441                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
442                 ofstream out;
443                 m->openOutputFile(outputFileName, out);
444                 
445                 
446                 ifstream in;
447                 m->openInputFile(qualfile, in);
448                 string name;
449                 
450                 bool wroteSomething = false;
451                 int removedCount = 0;
452                 
453                 
454                 while(!in.eof()){       
455                         string saveName = "";
456                         string name = "";
457                         string scores = "";
458                         
459                         in >> name; 
460                         
461                         if (name.length() != 0) { 
462                                 saveName = name.substr(1);
463                                 while (!in.eof())       {       
464                                         char c = in.get(); 
465                                         if (c == 10 || c == 13){        break;  }
466                                         else { name += c; }     
467                                 } 
468                                 m->gobble(in);
469                         }
470                         
471                         while(in){
472                                 char letter= in.get();
473                                 if(letter == '>'){      in.putback(letter);     break;  }
474                                 else{ scores += letter; }
475                         }
476                         
477                         m->gobble(in);
478                         
479                         if (names.count(saveName) == 0) {
480                                 wroteSomething = true;
481                                 
482                                 out << name << endl << scores;
483                         }else {  removedCount++;  }
484                         
485                         m->gobble(in);
486                 }
487                 in.close();
488                 out.close();
489                 
490                 
491                 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
492                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
493                 
494                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your quality file."); m->mothurOutEndLine();
495                 
496                 return 0;
497                 
498         }
499         catch(exception& e) {
500                 m->errorOut(e, "RemoveSeqsCommand", "readQual");
501                 exit(1);
502         }
503 }
504 //**********************************************************************************************************************
505 int RemoveSeqsCommand::readCount(){
506         try {
507         
508                 string thisOutputDir = outputDir;
509                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
510                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
511                 
512                 ofstream out;
513                 m->openOutputFile(outputFileName, out);
514                 
515                 ifstream in;
516                 m->openInputFile(countfile, in);
517                 
518                 bool wroteSomething = false;
519                 int removedCount = 0;
520                 
521         string headers = m->getline(in); m->gobble(in);
522         out << headers << endl;
523         
524         string name, rest; int thisTotal;
525         while (!in.eof()) {
526             
527             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
528             
529             in >> name; m->gobble(in); 
530             in >> thisTotal; m->gobble(in);
531             rest = m->getline(in); m->gobble(in);
532             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
533             
534             if (names.count(name) == 0) {
535                 out << name << '\t' << thisTotal << '\t' << rest << endl;
536                 wroteSomething = true;
537             }else { removedCount += thisTotal; }
538         }
539         in.close();
540                 out.close();
541                 
542                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
543                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
544                 
545                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
546         
547                 return 0;
548         }
549         catch(exception& e) {
550                 m->errorOut(e, "RemoveSeqsCommand", "readCount");
551                 exit(1);
552         }
553 }
554 //**********************************************************************************************************************
555 int RemoveSeqsCommand::readList(){
556         try {
557                 string thisOutputDir = outputDir;
558                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
559                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);            
560                 ofstream out;
561                 m->openOutputFile(outputFileName, out);
562                 
563                 ifstream in;
564                 m->openInputFile(listfile, in);
565                 
566                 bool wroteSomething = false;
567                 int removedCount = 0;
568                 
569                 while(!in.eof()){
570                         
571                         removedCount = 0;
572                         
573                         //read in list vector
574                         ListVector list(in);
575                         
576                         //make a new list vector
577                         ListVector newList;
578                         newList.setLabel(list.getLabel());
579                         
580                         //for each bin
581                         for (int i = 0; i < list.getNumBins(); i++) {
582                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
583                         
584                                 //parse out names that are in accnos file
585                                 string binnames = list.get(i);
586                                 
587                                 string newNames = "";
588                                 while (binnames.find_first_of(',') != -1) { 
589                                         string name = binnames.substr(0,binnames.find_first_of(','));
590                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
591                                         
592                                         //if that name is in the .accnos file, add it
593                                         if (names.count(name) == 0) {  newNames += name + ",";  }
594                                         else {  removedCount++;  }
595                                 }
596                         
597                                 //get last name
598                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
599                                 else {  removedCount++;  }
600
601                                 //if there are names in this bin add to new list
602                                 if (newNames != "") {  
603                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
604                                         newList.push_back(newNames);    
605                                 }
606                         }
607                                 
608                         //print new listvector
609                         if (newList.getNumBins() != 0) {
610                                 wroteSomething = true;
611                                 newList.print(out);
612                         }
613                         
614                         m->gobble(in);
615                 }
616                 in.close();     
617                 out.close();
618                 
619                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
620                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
621                 
622                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
623                 
624                 return 0;
625
626         }
627         catch(exception& e) {
628                 m->errorOut(e, "RemoveSeqsCommand", "readList");
629                 exit(1);
630         }
631 }
632 //**********************************************************************************************************************
633 int RemoveSeqsCommand::readName(){
634         try {
635                 string thisOutputDir = outputDir;
636                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
637                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
638                 ofstream out;
639                 m->openOutputFile(outputFileName, out);
640
641                 ifstream in;
642                 m->openInputFile(namefile, in);
643                 string name, firstCol, secondCol;
644                 
645                 bool wroteSomething = false;
646                 int removedCount = 0;
647                 
648                 while(!in.eof()){
649                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
650                         
651                         in >> firstCol;         m->gobble(in);          
652                         in >> secondCol;                        
653                         
654                         vector<string> parsedNames;
655                         m->splitAtComma(secondCol, parsedNames);
656                         
657                         vector<string> validSecond;  validSecond.clear();
658                         for (int i = 0; i < parsedNames.size(); i++) {
659                                 if (names.count(parsedNames[i]) == 0) {
660                                         validSecond.push_back(parsedNames[i]);
661                                 }
662                         }
663                         
664                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
665                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
666                                 removedCount += parsedNames.size();
667                         }else {
668                                 removedCount += parsedNames.size()-validSecond.size();
669                                 //if the name in the first column is in the set then print it and any other names in second column also in set
670                                 if (names.count(firstCol) == 0) {
671                                         
672                                         wroteSomething = true;
673                                         
674                                         out << firstCol << '\t';
675                                         
676                                         //you know you have at least one valid second since first column is valid
677                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
678                                         out << validSecond[validSecond.size()-1] << endl;
679                                         
680                                         //make first name in set you come to first column and then add the remaining names to second column
681                                 }else {
682                                         
683                                         //you want part of this row
684                                         if (validSecond.size() != 0) {
685                                                 
686                                                 wroteSomething = true;
687                                                 
688                                                 out << validSecond[0] << '\t';
689                                                 
690                                                 //you know you have at least one valid second since first column is valid
691                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
692                                                 out << validSecond[validSecond.size()-1] << endl;
693                                         }
694                                 }
695                         }
696                         m->gobble(in);
697                 }
698                 in.close();
699                 out.close();
700                 
701                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
702                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
703                 
704                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
705                 
706                 return 0;
707         }
708         catch(exception& e) {
709                 m->errorOut(e, "RemoveSeqsCommand", "readName");
710                 exit(1);
711         }
712 }
713
714 //**********************************************************************************************************************
715 int RemoveSeqsCommand::readGroup(){
716         try {
717                 string thisOutputDir = outputDir;
718                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
719                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);         
720                 ofstream out;
721                 m->openOutputFile(outputFileName, out);
722
723                 ifstream in;
724                 m->openInputFile(groupfile, in);
725                 string name, group;
726                 
727                 bool wroteSomething = false;
728                 int removedCount = 0;
729                 
730                 while(!in.eof()){
731                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
732                         
733                         in >> name;                             //read from first column
734                         in >> group;                    //read from second column
735                         
736                         //if this name is in the accnos file
737                         if (names.count(name) == 0) {
738                                 wroteSomething = true;
739                                 out << name << '\t' << group << endl;
740                         }else {  removedCount++;  }
741                                         
742                         m->gobble(in);
743                 }
744                 in.close();
745                 out.close();
746                 
747                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
748                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
749                 
750                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
751
752                 
753                 return 0;
754         }
755         catch(exception& e) {
756                 m->errorOut(e, "RemoveSeqsCommand", "readGroup");
757                 exit(1);
758         }
759 }
760 //**********************************************************************************************************************
761 int RemoveSeqsCommand::readTax(){
762         try {
763                 string thisOutputDir = outputDir;
764                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
765                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
766                 ofstream out;
767                 m->openOutputFile(outputFileName, out);
768
769                 ifstream in;
770                 m->openInputFile(taxfile, in);
771                 string name, tax;
772                 
773                 bool wroteSomething = false;
774                 int removedCount = 0;
775                 
776                 while(!in.eof()){
777                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
778                         
779                         in >> name;                             //read from first column
780                         in >> tax;                      //read from second column
781                         
782                         //if this name is in the accnos file
783                         if (names.count(name) == 0) {
784                                 wroteSomething = true;
785                                 out << name << '\t' << tax << endl;
786                         }else {  removedCount++;  }
787                                         
788                         m->gobble(in);
789                 }
790                 in.close();
791                 out.close();
792                 
793                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
794                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
795                 
796                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
797                 
798                 return 0;
799         }
800         catch(exception& e) {
801                 m->errorOut(e, "RemoveSeqsCommand", "readTax");
802                 exit(1);
803         }
804 }
805 //**********************************************************************************************************************
806 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
807 int RemoveSeqsCommand::readAlign(){
808         try {
809                 string thisOutputDir = outputDir;
810                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
811                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
812                 
813                 ofstream out;
814                 m->openOutputFile(outputFileName, out);
815
816                 ifstream in;
817                 m->openInputFile(alignfile, in);
818                 string name, junk;
819                 
820                 bool wroteSomething = false;
821                 int removedCount = 0;
822                 
823                 //read column headers
824                 for (int i = 0; i < 16; i++) {  
825                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
826                         else                    {       break;                  }
827                 }
828                 out << endl;
829                 
830                 while(!in.eof()){
831                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
832                         
833                         in >> name;                             //read from first column
834                         
835                         //if this name is in the accnos file
836                         if (names.count(name) == 0) {
837                                 wroteSomething = true;
838                                 
839                                 out << name << '\t';
840                                 
841                                 //read rest
842                                 for (int i = 0; i < 15; i++) {  
843                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
844                                         else                    {       break;                  }
845                                 }
846                                 out << endl;
847                                 
848                         }else {//still read just don't do anything with it
849                                 removedCount++;  
850                                 
851                                 //read rest
852                                 for (int i = 0; i < 15; i++) {  
853                                         if (!in.eof())  {       in >> junk;             }
854                                         else                    {       break;                  }
855                                 }
856                         }
857                         
858                         m->gobble(in);
859                 }
860                 in.close();
861                 out.close();
862                 
863                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
864                 outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
865                 
866                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
867
868                 
869                 return 0;
870                 
871         }
872         catch(exception& e) {
873                 m->errorOut(e, "RemoveSeqsCommand", "readAlign");
874                 exit(1);
875         }
876 }
877 //**********************************************************************************************************************
878
879