]> git.donarmstrong.com Git - mothur.git/blob - removeseqscommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / removeseqscommand.cpp
1 /*
2  *  removeseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveSeqsCommand::setParameters(){      
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "RemoveSeqsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string RemoveSeqsCommand::getHelpString(){      
40         try {
41                 string helpString = "";
42                 helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
43                 helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
44                 helpString += "The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
45                 helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
46                 helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
47                 helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "RemoveSeqsCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 string RemoveSeqsCommand::getOutputFileNameTag(string type, string inputName=""){       
58         try {
59         string outputFileName = "";
60                 map<string, vector<string> >::iterator it;
61         
62         //is this a type this command creates
63         it = outputTypes.find(type);
64         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
65         else {
66             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
67             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
68             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
69             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
70             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
71             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
73             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
74         }
75         return outputFileName;
76         }
77         catch(exception& e) {
78                 m->errorOut(e, "RemoveSeqsCommand", "getOutputFileNameTag");
79                 exit(1);
80         }
81 }
82
83 //**********************************************************************************************************************
84 RemoveSeqsCommand::RemoveSeqsCommand(){ 
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["fasta"] = tempOutNames;
90                 outputTypes["taxonomy"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["alignreport"] = tempOutNames;
94                 outputTypes["list"] = tempOutNames;
95                 outputTypes["qfile"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         
117                         ValidParameters validParameter;
118                         map<string,string>::iterator it;
119                         
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["fasta"] = tempOutNames;
128                         outputTypes["taxonomy"] = tempOutNames;
129                         outputTypes["name"] = tempOutNames;
130                         outputTypes["group"] = tempOutNames;
131                         outputTypes["alignreport"] = tempOutNames;
132                         outputTypes["list"] = tempOutNames;
133                         outputTypes["qfile"] = tempOutNames;
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("alignreport");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
149                                 }
150                                 
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("accnos");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
165                                 }
166                                 
167                                 it = parameters.find("list");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
173                                 }
174                                 
175                                 it = parameters.find("name");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
181                                 }
182                                 
183                                 it = parameters.find("group");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
189                                 }
190                                 
191                                 it = parameters.find("taxonomy");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
197                                 }
198                                 
199                                 it = parameters.find("qfile");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
205                                 }
206                         }
207
208                         
209                         //check for required parameters
210                         accnosfile = validParameter.validFile(parameters, "accnos", true);
211                         if (accnosfile == "not open") { abort = true; }
212                         else if (accnosfile == "not found") {  
213                                 accnosfile = m->getAccnosFile(); 
214                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
215                                 else { 
216                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
217                                         abort = true;
218                                 }  
219                         }else { m->setAccnosFile(accnosfile); } 
220                         
221                         fastafile = validParameter.validFile(parameters, "fasta", true);
222                         if (fastafile == "not open") { fastafile = ""; abort = true; }
223                         else if (fastafile == "not found") {  fastafile = "";  }        
224                         else { m->setFastaFile(fastafile); }
225                                                                    
226                         namefile = validParameter.validFile(parameters, "name", true);
227                         if (namefile == "not open") { namefile = ""; abort = true; }
228                         else if (namefile == "not found") {  namefile = "";  }  
229                         else { m->setNameFile(namefile); } 
230                                                                    
231                         groupfile = validParameter.validFile(parameters, "group", true);
232                         if (groupfile == "not open") { abort = true; }
233                         else if (groupfile == "not found") {  groupfile = "";  }
234                         else { m->setGroupFile(groupfile); }
235                         
236                         alignfile = validParameter.validFile(parameters, "alignreport", true);
237                         if (alignfile == "not open") { abort = true; }
238                         else if (alignfile == "not found") {  alignfile = "";  }
239                         
240                         listfile = validParameter.validFile(parameters, "list", true);
241                         if (listfile == "not open") { abort = true; }
242                         else if (listfile == "not found") {  listfile = "";  }
243                         else { m->setListFile(listfile); }
244                         
245                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
246                         if (taxfile == "not open") { abort = true; }
247                         else if (taxfile == "not found") {  taxfile = "";  }
248                         else { m->setTaxonomyFile(taxfile); }
249                         
250                         qualfile = validParameter.validFile(parameters, "qfile", true);
251                         if (qualfile == "not open") { abort = true; }
252                         else if (qualfile == "not found") {  qualfile = "";  }                  
253                         else { m->setQualFile(qualfile); }
254                         
255                         string usedDups = "true";
256                         string temp = validParameter.validFile(parameters, "dups", false);      
257                         if (temp == "not found") { 
258                                 if (namefile != "") {  temp = "true";                                   }
259                                 else                            {  temp = "false"; usedDups = "";       }
260                         }
261                         dups = m->isTrue(temp);
262                         
263                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
264                         
265                         if ((fastafile != "") && (namefile == "")) {
266                                 vector<string> files; files.push_back(fastafile);
267                                 parser.getNameFile(files);
268                         }
269                 }
270
271         }
272         catch(exception& e) {
273                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
274                 exit(1);
275         }
276 }
277 //**********************************************************************************************************************
278
279 int RemoveSeqsCommand::execute(){
280         try {
281                 
282                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
283                 
284                 //get names you want to keep
285                 names = m->readAccnos(accnosfile);
286                 
287                 if (m->control_pressed) { return 0; }
288                 
289                 //read through the correct file and output lines you want to keep
290                 if (namefile != "")                     {               readName();             }
291                 if (fastafile != "")            {               readFasta();    }
292                 if (groupfile != "")            {               readGroup();    }
293                 if (alignfile != "")            {               readAlign();    }
294                 if (listfile != "")                     {               readList();             }
295                 if (taxfile != "")                      {               readTax();              }
296                 if (qualfile != "")                     {               readQual();             }
297                 
298                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
299         
300                 if (outputNames.size() != 0) {
301                         m->mothurOutEndLine();
302                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
303                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
304                         m->mothurOutEndLine();
305                         
306                         //set fasta file as new current fastafile
307                         string current = "";
308                         itTypes = outputTypes.find("fasta");
309                         if (itTypes != outputTypes.end()) {
310                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
311                         }
312                         
313                         itTypes = outputTypes.find("name");
314                         if (itTypes != outputTypes.end()) {
315                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
316                         }
317                         
318                         itTypes = outputTypes.find("group");
319                         if (itTypes != outputTypes.end()) {
320                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
321                         }
322                         
323                         itTypes = outputTypes.find("list");
324                         if (itTypes != outputTypes.end()) {
325                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
326                         }
327                         
328                         itTypes = outputTypes.find("taxonomy");
329                         if (itTypes != outputTypes.end()) {
330                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
331                         }
332                         
333                         itTypes = outputTypes.find("qfile");
334                         if (itTypes != outputTypes.end()) {
335                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
336                         }                       
337                 }
338                 
339                 return 0;               
340         }
341
342         catch(exception& e) {
343                 m->errorOut(e, "RemoveSeqsCommand", "execute");
344                 exit(1);
345         }
346 }
347
348 //**********************************************************************************************************************
349 int RemoveSeqsCommand::readFasta(){
350         try {
351                 string thisOutputDir = outputDir;
352                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
353                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
354                 
355                 ofstream out;
356                 m->openOutputFile(outputFileName, out);
357                 
358                 ifstream in;
359                 m->openInputFile(fastafile, in);
360                 string name;
361                 
362                 bool wroteSomething = false;
363                 int removedCount = 0;
364                 
365                 while(!in.eof()){
366                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
367                         
368                         Sequence currSeq(in);
369                         name = currSeq.getName();
370                         
371                         if (name != "") {
372                                 //if this name is in the accnos file
373                                 if (names.count(name) == 0) {
374                                         wroteSomething = true;
375                                         
376                                         currSeq.printSequence(out);
377                                 }else {  removedCount++;  }
378                         }
379                         m->gobble(in);
380                 }
381                 in.close();     
382                 out.close();
383                 
384                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
385                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
386                 
387                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
388                 
389                 return 0;
390                 
391         }
392         catch(exception& e) {
393                 m->errorOut(e, "RemoveSeqsCommand", "readFasta");
394                 exit(1);
395         }
396 }
397 //**********************************************************************************************************************
398 int RemoveSeqsCommand::readQual(){
399         try {
400                 string thisOutputDir = outputDir;
401                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
402                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
403                 ofstream out;
404                 m->openOutputFile(outputFileName, out);
405                 
406                 
407                 ifstream in;
408                 m->openInputFile(qualfile, in);
409                 string name;
410                 
411                 bool wroteSomething = false;
412                 int removedCount = 0;
413                 
414                 
415                 while(!in.eof()){       
416                         string saveName = "";
417                         string name = "";
418                         string scores = "";
419                         
420                         in >> name; 
421                         
422                         if (name.length() != 0) { 
423                                 saveName = name.substr(1);
424                                 while (!in.eof())       {       
425                                         char c = in.get(); 
426                                         if (c == 10 || c == 13){        break;  }
427                                         else { name += c; }     
428                                 } 
429                                 m->gobble(in);
430                         }
431                         
432                         while(in){
433                                 char letter= in.get();
434                                 if(letter == '>'){      in.putback(letter);     break;  }
435                                 else{ scores += letter; }
436                         }
437                         
438                         m->gobble(in);
439                         
440                         if (names.count(saveName) == 0) {
441                                 wroteSomething = true;
442                                 
443                                 out << name << endl << scores;
444                         }else {  removedCount++;  }
445                         
446                         m->gobble(in);
447                 }
448                 in.close();
449                 out.close();
450                 
451                 
452                 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
453                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
454                 
455                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your quality file."); m->mothurOutEndLine();
456                 
457                 return 0;
458                 
459         }
460         catch(exception& e) {
461                 m->errorOut(e, "RemoveSeqsCommand", "readQual");
462                 exit(1);
463         }
464 }
465 //**********************************************************************************************************************
466 int RemoveSeqsCommand::readList(){
467         try {
468                 string thisOutputDir = outputDir;
469                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
470                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);            
471                 ofstream out;
472                 m->openOutputFile(outputFileName, out);
473                 
474                 ifstream in;
475                 m->openInputFile(listfile, in);
476                 
477                 bool wroteSomething = false;
478                 int removedCount = 0;
479                 
480                 while(!in.eof()){
481                         
482                         removedCount = 0;
483                         
484                         //read in list vector
485                         ListVector list(in);
486                         
487                         //make a new list vector
488                         ListVector newList;
489                         newList.setLabel(list.getLabel());
490                         
491                         //for each bin
492                         for (int i = 0; i < list.getNumBins(); i++) {
493                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
494                         
495                                 //parse out names that are in accnos file
496                                 string binnames = list.get(i);
497                                 
498                                 string newNames = "";
499                                 while (binnames.find_first_of(',') != -1) { 
500                                         string name = binnames.substr(0,binnames.find_first_of(','));
501                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
502                                         
503                                         //if that name is in the .accnos file, add it
504                                         if (names.count(name) == 0) {  newNames += name + ",";  }
505                                         else {  removedCount++;  }
506                                 }
507                         
508                                 //get last name
509                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
510                                 else {  removedCount++;  }
511
512                                 //if there are names in this bin add to new list
513                                 if (newNames != "") {  
514                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
515                                         newList.push_back(newNames);    
516                                 }
517                         }
518                                 
519                         //print new listvector
520                         if (newList.getNumBins() != 0) {
521                                 wroteSomething = true;
522                                 newList.print(out);
523                         }
524                         
525                         m->gobble(in);
526                 }
527                 in.close();     
528                 out.close();
529                 
530                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
531                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
532                 
533                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
534                 
535                 return 0;
536
537         }
538         catch(exception& e) {
539                 m->errorOut(e, "RemoveSeqsCommand", "readList");
540                 exit(1);
541         }
542 }
543 //**********************************************************************************************************************
544 int RemoveSeqsCommand::readName(){
545         try {
546                 string thisOutputDir = outputDir;
547                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
548                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
549                 ofstream out;
550                 m->openOutputFile(outputFileName, out);
551
552                 ifstream in;
553                 m->openInputFile(namefile, in);
554                 string name, firstCol, secondCol;
555                 
556                 bool wroteSomething = false;
557                 int removedCount = 0;
558                 
559                 while(!in.eof()){
560                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
561                         
562                         in >> firstCol;         m->gobble(in);          
563                         in >> secondCol;                        
564                         
565                         vector<string> parsedNames;
566                         m->splitAtComma(secondCol, parsedNames);
567                         
568                         vector<string> validSecond;  validSecond.clear();
569                         for (int i = 0; i < parsedNames.size(); i++) {
570                                 if (names.count(parsedNames[i]) == 0) {
571                                         validSecond.push_back(parsedNames[i]);
572                                 }
573                         }
574                         
575                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
576                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
577                                 removedCount += parsedNames.size();
578                         }else {
579                                 removedCount += parsedNames.size()-validSecond.size();
580                                 //if the name in the first column is in the set then print it and any other names in second column also in set
581                                 if (names.count(firstCol) == 0) {
582                                         
583                                         wroteSomething = true;
584                                         
585                                         out << firstCol << '\t';
586                                         
587                                         //you know you have at least one valid second since first column is valid
588                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
589                                         out << validSecond[validSecond.size()-1] << endl;
590                                         
591                                         //make first name in set you come to first column and then add the remaining names to second column
592                                 }else {
593                                         
594                                         //you want part of this row
595                                         if (validSecond.size() != 0) {
596                                                 
597                                                 wroteSomething = true;
598                                                 
599                                                 out << validSecond[0] << '\t';
600                                                 
601                                                 //you know you have at least one valid second since first column is valid
602                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
603                                                 out << validSecond[validSecond.size()-1] << endl;
604                                         }
605                                 }
606                         }
607                         m->gobble(in);
608                 }
609                 in.close();
610                 out.close();
611                 
612                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
613                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
614                 
615                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
616                 
617                 return 0;
618         }
619         catch(exception& e) {
620                 m->errorOut(e, "RemoveSeqsCommand", "readName");
621                 exit(1);
622         }
623 }
624
625 //**********************************************************************************************************************
626 int RemoveSeqsCommand::readGroup(){
627         try {
628                 string thisOutputDir = outputDir;
629                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
630                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);         
631                 ofstream out;
632                 m->openOutputFile(outputFileName, out);
633
634                 ifstream in;
635                 m->openInputFile(groupfile, in);
636                 string name, group;
637                 
638                 bool wroteSomething = false;
639                 int removedCount = 0;
640                 
641                 while(!in.eof()){
642                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
643                         
644                         in >> name;                             //read from first column
645                         in >> group;                    //read from second column
646                         
647                         //if this name is in the accnos file
648                         if (names.count(name) == 0) {
649                                 wroteSomething = true;
650                                 out << name << '\t' << group << endl;
651                         }else {  removedCount++;  }
652                                         
653                         m->gobble(in);
654                 }
655                 in.close();
656                 out.close();
657                 
658                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
659                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
660                 
661                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
662
663                 
664                 return 0;
665         }
666         catch(exception& e) {
667                 m->errorOut(e, "RemoveSeqsCommand", "readGroup");
668                 exit(1);
669         }
670 }
671 //**********************************************************************************************************************
672 int RemoveSeqsCommand::readTax(){
673         try {
674                 string thisOutputDir = outputDir;
675                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
676                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
677                 ofstream out;
678                 m->openOutputFile(outputFileName, out);
679
680                 ifstream in;
681                 m->openInputFile(taxfile, in);
682                 string name, tax;
683                 
684                 bool wroteSomething = false;
685                 int removedCount = 0;
686                 
687                 while(!in.eof()){
688                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
689                         
690                         in >> name;                             //read from first column
691                         in >> tax;                      //read from second column
692                         
693                         //if this name is in the accnos file
694                         if (names.count(name) == 0) {
695                                 wroteSomething = true;
696                                 out << name << '\t' << tax << endl;
697                         }else {  removedCount++;  }
698                                         
699                         m->gobble(in);
700                 }
701                 in.close();
702                 out.close();
703                 
704                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
705                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
706                 
707                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
708                 
709                 return 0;
710         }
711         catch(exception& e) {
712                 m->errorOut(e, "RemoveSeqsCommand", "readTax");
713                 exit(1);
714         }
715 }
716 //**********************************************************************************************************************
717 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
718 int RemoveSeqsCommand::readAlign(){
719         try {
720                 string thisOutputDir = outputDir;
721                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
722                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
723                 
724                 ofstream out;
725                 m->openOutputFile(outputFileName, out);
726
727                 ifstream in;
728                 m->openInputFile(alignfile, in);
729                 string name, junk;
730                 
731                 bool wroteSomething = false;
732                 int removedCount = 0;
733                 
734                 //read column headers
735                 for (int i = 0; i < 16; i++) {  
736                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
737                         else                    {       break;                  }
738                 }
739                 out << endl;
740                 
741                 while(!in.eof()){
742                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
743                         
744                         in >> name;                             //read from first column
745                         
746                         //if this name is in the accnos file
747                         if (names.count(name) == 0) {
748                                 wroteSomething = true;
749                                 
750                                 out << name << '\t';
751                                 
752                                 //read rest
753                                 for (int i = 0; i < 15; i++) {  
754                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
755                                         else                    {       break;                  }
756                                 }
757                                 out << endl;
758                                 
759                         }else {//still read just don't do anything with it
760                                 removedCount++;  
761                                 
762                                 //read rest
763                                 for (int i = 0; i < 15; i++) {  
764                                         if (!in.eof())  {       in >> junk;             }
765                                         else                    {       break;                  }
766                                 }
767                         }
768                         
769                         m->gobble(in);
770                 }
771                 in.close();
772                 out.close();
773                 
774                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
775                 outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
776                 
777                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
778
779                 
780                 return 0;
781                 
782         }
783         catch(exception& e) {
784                 m->errorOut(e, "RemoveSeqsCommand", "readAlign");
785                 exit(1);
786         }
787 }
788 //**********************************************************************************************************************
789
790