]> git.donarmstrong.com Git - mothur.git/blob - removeseqscommand.cpp
added citation function to commands
[mothur.git] / removeseqscommand.cpp
1 /*
2  *  removeseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveSeqsCommand::setParameters(){      
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "RemoveSeqsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string RemoveSeqsCommand::getHelpString(){      
40         try {
41                 string helpString = "";
42                 helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
43                 helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
44                 helpString += "The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n";
45                 helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
46                 helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
47                 helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "RemoveSeqsCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57
58 //**********************************************************************************************************************
59 RemoveSeqsCommand::RemoveSeqsCommand(){ 
60         try {
61                 abort = true; calledHelp = true; 
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;   
81                 
82                 //allow user to run help
83                 if(option == "help") { help(); abort = true; calledHelp = true; }
84                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85                 
86                 else {
87                         vector<string> myArray = setParameters();
88                         
89                         OptionParser parser(option);
90                         map<string,string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string,string>::iterator it;
94                         
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["fasta"] = tempOutNames;
103                         outputTypes["taxonomy"] = tempOutNames;
104                         outputTypes["name"] = tempOutNames;
105                         outputTypes["group"] = tempOutNames;
106                         outputTypes["alignreport"] = tempOutNames;
107                         outputTypes["list"] = tempOutNames;
108                         outputTypes["qfile"] = tempOutNames;
109                         
110                         //if the user changes the output directory command factory will send this info to us in the output parameter 
111                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
112                         
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("alignreport");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
124                                 }
125                                 
126                                 it = parameters.find("fasta");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
132                                 }
133                                 
134                                 it = parameters.find("accnos");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
140                                 }
141                                 
142                                 it = parameters.find("list");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
148                                 }
149                                 
150                                 it = parameters.find("name");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
156                                 }
157                                 
158                                 it = parameters.find("group");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
164                                 }
165                                 
166                                 it = parameters.find("taxonomy");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
172                                 }
173                                 
174                                 it = parameters.find("qfile");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
180                                 }
181                         }
182
183                         
184                         //check for required parameters
185                         accnosfile = validParameter.validFile(parameters, "accnos", true);
186                         if (accnosfile == "not open") { abort = true; }
187                         else if (accnosfile == "not found") {  
188                                 accnosfile = m->getAccnosFile(); 
189                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
190                                 else { 
191                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
192                                         abort = true;
193                                 }  
194                         }       
195                         
196                         fastafile = validParameter.validFile(parameters, "fasta", true);
197                         if (fastafile == "not open") { abort = true; }
198                         else if (fastafile == "not found") {  fastafile = "";  }        
199                         
200                         namefile = validParameter.validFile(parameters, "name", true);
201                         if (namefile == "not open") { abort = true; }
202                         else if (namefile == "not found") {  namefile = "";  }  
203                         
204                         groupfile = validParameter.validFile(parameters, "group", true);
205                         if (groupfile == "not open") { abort = true; }
206                         else if (groupfile == "not found") {  groupfile = "";  }        
207                         
208                         alignfile = validParameter.validFile(parameters, "alignreport", true);
209                         if (alignfile == "not open") { abort = true; }
210                         else if (alignfile == "not found") {  alignfile = "";  }
211                         
212                         listfile = validParameter.validFile(parameters, "list", true);
213                         if (listfile == "not open") { abort = true; }
214                         else if (listfile == "not found") {  listfile = "";  }
215                         
216                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
217                         if (taxfile == "not open") { abort = true; }
218                         else if (taxfile == "not found") {  taxfile = "";  }
219                         
220                         qualfile = validParameter.validFile(parameters, "qfile", true);
221                         if (qualfile == "not open") { abort = true; }
222                         else if (qualfile == "not found") {  qualfile = "";  }                  
223
224                         
225                         string usedDups = "true";
226                         string temp = validParameter.validFile(parameters, "dups", false);      
227                         if (temp == "not found") { 
228                                 if (namefile != "") {  temp = "true";                                   }
229                                 else                            {  temp = "false"; usedDups = "";       }
230                         }
231                         dups = m->isTrue(temp);
232                         
233                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
234                         
235                 }
236
237         }
238         catch(exception& e) {
239                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
240                 exit(1);
241         }
242 }
243 //**********************************************************************************************************************
244
245 int RemoveSeqsCommand::execute(){
246         try {
247                 
248                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
249                 
250                 //get names you want to keep
251                 readAccnos();
252                 
253                 if (m->control_pressed) { return 0; }
254                 
255                 //read through the correct file and output lines you want to keep
256                 if (namefile != "")                     {               readName();             }
257                 if (fastafile != "")            {               readFasta();    }
258                 if (groupfile != "")            {               readGroup();    }
259                 if (alignfile != "")            {               readAlign();    }
260                 if (listfile != "")                     {               readList();             }
261                 if (taxfile != "")                      {               readTax();              }
262                 if (qualfile != "")                     {               readQual();             }
263                 
264                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
265                 
266                 m->mothurOut("Removed " + toString(names.size()) + " sequences."); m->mothurOutEndLine();
267                 
268                 if (outputNames.size() != 0) {
269                         m->mothurOutEndLine();
270                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
271                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
272                         m->mothurOutEndLine();
273                         
274                         //set fasta file as new current fastafile
275                         string current = "";
276                         itTypes = outputTypes.find("fasta");
277                         if (itTypes != outputTypes.end()) {
278                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
279                         }
280                         
281                         itTypes = outputTypes.find("name");
282                         if (itTypes != outputTypes.end()) {
283                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
284                         }
285                         
286                         itTypes = outputTypes.find("group");
287                         if (itTypes != outputTypes.end()) {
288                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
289                         }
290                         
291                         itTypes = outputTypes.find("list");
292                         if (itTypes != outputTypes.end()) {
293                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
294                         }
295                         
296                         itTypes = outputTypes.find("taxonomy");
297                         if (itTypes != outputTypes.end()) {
298                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
299                         }
300                         
301                         itTypes = outputTypes.find("qfile");
302                         if (itTypes != outputTypes.end()) {
303                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
304                         }                       
305                 }
306                 
307                 return 0;               
308         }
309
310         catch(exception& e) {
311                 m->errorOut(e, "RemoveSeqsCommand", "execute");
312                 exit(1);
313         }
314 }
315
316 //**********************************************************************************************************************
317 int RemoveSeqsCommand::readFasta(){
318         try {
319                 string thisOutputDir = outputDir;
320                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
321                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
322                 
323                 ofstream out;
324                 m->openOutputFile(outputFileName, out);
325                 
326                 ifstream in;
327                 m->openInputFile(fastafile, in);
328                 string name;
329                 
330                 bool wroteSomething = false;
331                 
332                 while(!in.eof()){
333                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
334                         
335                         Sequence currSeq(in);
336                         name = currSeq.getName();
337                         
338                         if (name != "") {
339                                 //if this name is in the accnos file
340                                 if (names.count(name) == 0) {
341                                         wroteSomething = true;
342                                         
343                                         currSeq.printSequence(out);
344                                 }
345                         }
346                         m->gobble(in);
347                 }
348                 in.close();     
349                 out.close();
350                 
351                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
352                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
353                 
354                 return 0;
355                 
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "RemoveSeqsCommand", "readFasta");
359                 exit(1);
360         }
361 }
362 //**********************************************************************************************************************
363 int RemoveSeqsCommand::readQual(){
364         try {
365                 string thisOutputDir = outputDir;
366                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
367                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
368                 ofstream out;
369                 m->openOutputFile(outputFileName, out);
370                 
371                 
372                 ifstream in;
373                 m->openInputFile(qualfile, in);
374                 string name;
375                 
376                 bool wroteSomething = false;
377                 
378                 
379                 while(!in.eof()){       
380                         string saveName = "";
381                         string name = "";
382                         string scores = "";
383                         
384                         in >> name; 
385                         
386                         if (name.length() != 0) { 
387                                 saveName = name.substr(1);
388                                 while (!in.eof())       {       
389                                         char c = in.get(); 
390                                         if (c == 10 || c == 13){        break;  }
391                                         else { name += c; }     
392                                 } 
393                                 m->gobble(in);
394                         }
395                         
396                         while(in){
397                                 char letter= in.get();
398                                 if(letter == '>'){      in.putback(letter);     break;  }
399                                 else{ scores += letter; }
400                         }
401                         
402                         m->gobble(in);
403                         
404                         if (names.count(saveName) == 0) {
405                                 wroteSomething = true;
406                                 
407                                 out << name << endl << scores;
408                         }
409                         
410                         m->gobble(in);
411                 }
412                 in.close();
413                 out.close();
414                 
415                 
416                 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
417                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
418                 
419                 return 0;
420                 
421         }
422         catch(exception& e) {
423                 m->errorOut(e, "RemoveSeqsCommand", "readQual");
424                 exit(1);
425         }
426 }
427 //**********************************************************************************************************************
428 int RemoveSeqsCommand::readList(){
429         try {
430                 string thisOutputDir = outputDir;
431                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
432                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
433                 
434                 ofstream out;
435                 m->openOutputFile(outputFileName, out);
436                 
437                 ifstream in;
438                 m->openInputFile(listfile, in);
439                 
440                 bool wroteSomething = false;
441                 
442                 while(!in.eof()){
443                         //read in list vector
444                         ListVector list(in);
445                         
446                         //make a new list vector
447                         ListVector newList;
448                         newList.setLabel(list.getLabel());
449                         
450                         //for each bin
451                         for (int i = 0; i < list.getNumBins(); i++) {
452                                 if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
453                         
454                                 //parse out names that are in accnos file
455                                 string binnames = list.get(i);
456                                 
457                                 string newNames = "";
458                                 while (binnames.find_first_of(',') != -1) { 
459                                         string name = binnames.substr(0,binnames.find_first_of(','));
460                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
461                                         
462                                         //if that name is in the .accnos file, add it
463                                         if (names.count(name) == 0) {  newNames += name + ",";  }
464                                 }
465                         
466                                 //get last name
467                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
468
469                                 //if there are names in this bin add to new list
470                                 if (newNames != "") {  
471                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
472                                         newList.push_back(newNames);    
473                                 }
474                         }
475                                 
476                         //print new listvector
477                         if (newList.getNumBins() != 0) {
478                                 wroteSomething = true;
479                                 newList.print(out);
480                         }
481                         
482                         m->gobble(in);
483                 }
484                 in.close();     
485                 out.close();
486                 
487                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
488                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
489                                 
490                 return 0;
491
492         }
493         catch(exception& e) {
494                 m->errorOut(e, "RemoveSeqsCommand", "readList");
495                 exit(1);
496         }
497 }
498 //**********************************************************************************************************************
499 int RemoveSeqsCommand::readName(){
500         try {
501                 string thisOutputDir = outputDir;
502                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
503                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
504
505                 ofstream out;
506                 m->openOutputFile(outputFileName, out);
507
508                 ifstream in;
509                 m->openInputFile(namefile, in);
510                 string name, firstCol, secondCol;
511                 
512                 bool wroteSomething = false;
513                                 
514                 
515                 while(!in.eof()){
516                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
517                         
518                         in >> firstCol;         m->gobble(in);          
519                         in >> secondCol;                        
520                         
521                         vector<string> parsedNames;
522                         m->splitAtComma(secondCol, parsedNames);
523                         
524                         vector<string> validSecond;  validSecond.clear();
525                         for (int i = 0; i < parsedNames.size(); i++) {
526                                 if (names.count(parsedNames[i]) == 0) {
527                                         validSecond.push_back(parsedNames[i]);
528                                 }
529                         }
530                         
531                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
532                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
533                         }else {
534                                 //if the name in the first column is in the set then print it and any other names in second column also in set
535                                 if (names.count(firstCol) == 0) {
536                                         
537                                         wroteSomething = true;
538                                         
539                                         out << firstCol << '\t';
540                                         
541                                         //you know you have at least one valid second since first column is valid
542                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
543                                         out << validSecond[validSecond.size()-1] << endl;
544                                         
545                                         //make first name in set you come to first column and then add the remaining names to second column
546                                 }else {
547                                         
548                                         //you want part of this row
549                                         if (validSecond.size() != 0) {
550                                                 
551                                                 wroteSomething = true;
552                                                 
553                                                 out << validSecond[0] << '\t';
554                                                 
555                                                 //you know you have at least one valid second since first column is valid
556                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
557                                                 out << validSecond[validSecond.size()-1] << endl;
558                                         }
559                                 }
560                         }
561                         m->gobble(in);
562                 }
563                 in.close();
564                 out.close();
565                 
566                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
567                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
568                 
569                 return 0;
570         }
571         catch(exception& e) {
572                 m->errorOut(e, "RemoveSeqsCommand", "readName");
573                 exit(1);
574         }
575 }
576
577 //**********************************************************************************************************************
578 int RemoveSeqsCommand::readGroup(){
579         try {
580                 string thisOutputDir = outputDir;
581                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
582                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
583                 
584                 ofstream out;
585                 m->openOutputFile(outputFileName, out);
586
587                 ifstream in;
588                 m->openInputFile(groupfile, in);
589                 string name, group;
590                 
591                 bool wroteSomething = false;
592                 
593                 while(!in.eof()){
594                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
595                         
596                         in >> name;                             //read from first column
597                         in >> group;                    //read from second column
598                         
599                         //if this name is in the accnos file
600                         if (names.count(name) == 0) {
601                                 wroteSomething = true;
602                                 out << name << '\t' << group << endl;
603                         }
604                                         
605                         m->gobble(in);
606                 }
607                 in.close();
608                 out.close();
609                 
610                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
611                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
612                 
613                 return 0;
614         }
615         catch(exception& e) {
616                 m->errorOut(e, "RemoveSeqsCommand", "readGroup");
617                 exit(1);
618         }
619 }
620 //**********************************************************************************************************************
621 int RemoveSeqsCommand::readTax(){
622         try {
623                 string thisOutputDir = outputDir;
624                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
625                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
626                 ofstream out;
627                 m->openOutputFile(outputFileName, out);
628
629                 ifstream in;
630                 m->openInputFile(taxfile, in);
631                 string name, tax;
632                 
633                 bool wroteSomething = false;
634                 
635                 while(!in.eof()){
636                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
637                         
638                         in >> name;                             //read from first column
639                         in >> tax;                      //read from second column
640                         
641                         //if this name is in the accnos file
642                         if (names.count(name) == 0) {
643                                 wroteSomething = true;
644                                 out << name << '\t' << tax << endl;
645                         }
646                                         
647                         m->gobble(in);
648                 }
649                 in.close();
650                 out.close();
651                 
652                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
653                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
654                 
655                 return 0;
656         }
657         catch(exception& e) {
658                 m->errorOut(e, "RemoveSeqsCommand", "readTax");
659                 exit(1);
660         }
661 }
662 //**********************************************************************************************************************
663 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
664 int RemoveSeqsCommand::readAlign(){
665         try {
666                 string thisOutputDir = outputDir;
667                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
668                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
669                 
670                 ofstream out;
671                 m->openOutputFile(outputFileName, out);
672
673                 ifstream in;
674                 m->openInputFile(alignfile, in);
675                 string name, junk;
676                 
677                 bool wroteSomething = false;
678                 
679                 //read column headers
680                 for (int i = 0; i < 16; i++) {  
681                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
682                         else                    {       break;                  }
683                 }
684                 out << endl;
685                 
686                 while(!in.eof()){
687                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
688                         
689                         in >> name;                             //read from first column
690                         
691                         //if this name is in the accnos file
692                         if (names.count(name) == 0) {
693                                 wroteSomething = true;
694                                 
695                                 out << name << '\t';
696                                 
697                                 //read rest
698                                 for (int i = 0; i < 15; i++) {  
699                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
700                                         else                    {       break;                  }
701                                 }
702                                 out << endl;
703                                 
704                         }else {//still read just don't do anything with it
705                                 
706                                 //read rest
707                                 for (int i = 0; i < 15; i++) {  
708                                         if (!in.eof())  {       in >> junk;             }
709                                         else                    {       break;                  }
710                                 }
711                         }
712                         
713                         m->gobble(in);
714                 }
715                 in.close();
716                 out.close();
717                 
718                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
719                 outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
720                 
721                 return 0;
722                 
723         }
724         catch(exception& e) {
725                 m->errorOut(e, "RemoveSeqsCommand", "readAlign");
726                 exit(1);
727         }
728 }
729 //**********************************************************************************************************************
730 void RemoveSeqsCommand::readAccnos(){
731         try {
732                 
733                 ifstream in;
734                 m->openInputFile(accnosfile, in);
735                 string name;
736                 
737                 while(!in.eof()){
738                         in >> name;
739                                                 
740                         names.insert(name);
741                         
742                         m->gobble(in);
743                 }
744                 in.close();             
745
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "RemoveSeqsCommand", "readAccnos");
749                 exit(1);
750         }
751 }
752
753 //**********************************************************************************************************************
754
755