]> git.donarmstrong.com Git - mothur.git/blob - removeseqscommand.cpp
added fastq to list.seqs, get.seqs and remove.seqs. fixed bug where venn command...
[mothur.git] / removeseqscommand.cpp
1 /*
2  *  removeseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> RemoveSeqsCommand::setParameters(){      
17         try {
18         CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
25                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
26                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile);
27                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
28                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "RemoveSeqsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string RemoveSeqsCommand::getHelpString(){      
43         try {
44                 string helpString = "";
45                 helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
46                 helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
47                 helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, fastq and dups.  You must provide accnos and at least one of the file parameters.\n";
48                 helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
49                 helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
50                 helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "RemoveSeqsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string RemoveSeqsCommand::getOutputPattern(string type) {
61     try {
62         string pattern = "";
63         
64         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
65         else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
66         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
67         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
68         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
69         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
70         else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
71         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
72         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "GetSeqsCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 RemoveSeqsCommand::RemoveSeqsCommand(){ 
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["fasta"] = tempOutNames;
89         outputTypes["fastq"] = tempOutNames;
90                 outputTypes["taxonomy"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["alignreport"] = tempOutNames;
94                 outputTypes["list"] = tempOutNames;
95                 outputTypes["qfile"] = tempOutNames;
96         outputTypes["count"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["fasta"] = tempOutNames;
129             outputTypes["fastq"] = tempOutNames;
130                         outputTypes["taxonomy"] = tempOutNames;
131                         outputTypes["name"] = tempOutNames;
132                         outputTypes["group"] = tempOutNames;
133                         outputTypes["alignreport"] = tempOutNames;
134                         outputTypes["list"] = tempOutNames;
135                         outputTypes["qfile"] = tempOutNames;
136             outputTypes["count"] = tempOutNames;
137                         
138                         //if the user changes the output directory command factory will send this info to us in the output parameter 
139                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
140                         
141                         //if the user changes the input directory command factory will send this info to us in the output parameter 
142                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
143                         if (inputDir == "not found"){   inputDir = "";          }
144                         else {
145                                 string path;
146                                 it = parameters.find("alignreport");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
152                                 }
153                                 
154                                 it = parameters.find("fasta");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
160                                 }
161                                 
162                                 it = parameters.find("accnos");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
168                                 }
169                                 
170                                 it = parameters.find("list");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("name");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
184                                 }
185                                 
186                                 it = parameters.find("group");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
192                                 }
193                                 
194                                 it = parameters.find("taxonomy");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
200                                 }
201                                 
202                                 it = parameters.find("qfile");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
208                                 }
209                 
210                 it = parameters.find("count");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
216                                 }
217                 
218                 it = parameters.find("fastq");
219                                 //user has given a template file
220                                 if(it != parameters.end()){
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
224                                 }
225                         }
226
227                         
228                         //check for required parameters
229                         accnosfile = validParameter.validFile(parameters, "accnos", true);
230                         if (accnosfile == "not open") { abort = true; }
231                         else if (accnosfile == "not found") {  
232                                 accnosfile = m->getAccnosFile(); 
233                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
234                                 else { 
235                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
236                                         abort = true;
237                                 }  
238                         }else { m->setAccnosFile(accnosfile); } 
239                         
240                         fastafile = validParameter.validFile(parameters, "fasta", true);
241                         if (fastafile == "not open") { fastafile = ""; abort = true; }
242                         else if (fastafile == "not found") {  fastafile = "";  }        
243                         else { m->setFastaFile(fastafile); }
244                                                                    
245                         namefile = validParameter.validFile(parameters, "name", true);
246                         if (namefile == "not open") { namefile = ""; abort = true; }
247                         else if (namefile == "not found") {  namefile = "";  }  
248                         else { m->setNameFile(namefile); } 
249                                                                    
250                         groupfile = validParameter.validFile(parameters, "group", true);
251                         if (groupfile == "not open") { abort = true; }
252                         else if (groupfile == "not found") {  groupfile = "";  }
253                         else { m->setGroupFile(groupfile); }
254                         
255                         alignfile = validParameter.validFile(parameters, "alignreport", true);
256                         if (alignfile == "not open") { abort = true; }
257                         else if (alignfile == "not found") {  alignfile = "";  }
258                         
259                         listfile = validParameter.validFile(parameters, "list", true);
260                         if (listfile == "not open") { abort = true; }
261                         else if (listfile == "not found") {  listfile = "";  }
262                         else { m->setListFile(listfile); }
263                         
264                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
265                         if (taxfile == "not open") { abort = true; }
266                         else if (taxfile == "not found") {  taxfile = "";  }
267                         else { m->setTaxonomyFile(taxfile); }
268                         
269                         qualfile = validParameter.validFile(parameters, "qfile", true);
270                         if (qualfile == "not open") { abort = true; }
271                         else if (qualfile == "not found") {  qualfile = "";  }                  
272                         else { m->setQualFile(qualfile); }
273             
274             fastqfile = validParameter.validFile(parameters, "fastq", true);
275                         if (fastqfile == "not open") { abort = true; }
276                         else if (fastqfile == "not found") {  fastqfile = "";  }
277                         
278                         string usedDups = "true";
279                         string temp = validParameter.validFile(parameters, "dups", false);      
280                         if (temp == "not found") { 
281                                 if (namefile != "") {  temp = "true";                                   }
282                                 else                            {  temp = "false"; usedDups = "";       }
283                         }
284                         dups = m->isTrue(temp);
285             
286             countfile = validParameter.validFile(parameters, "count", true);
287             if (countfile == "not open") { countfile = ""; abort = true; }
288             else if (countfile == "not found") { countfile = "";  }     
289             else { m->setCountTableFile(countfile); }
290             
291             if ((namefile != "") && (countfile != "")) {
292                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
293             }
294             
295             if ((groupfile != "") && (countfile != "")) {
296                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
297             }
298                         
299                         if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport, fastq or list."); m->mothurOutEndLine(); abort = true; }
300                         
301             if (countfile == "") {
302                 if ((fastafile != "") && (namefile == "")) {
303                     vector<string> files; files.push_back(fastafile);
304                     parser.getNameFile(files);
305                 }
306             }
307                 }
308
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
312                 exit(1);
313         }
314 }
315 //**********************************************************************************************************************
316
317 int RemoveSeqsCommand::execute(){
318         try {
319                 
320                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
321                 
322                 //get names you want to keep
323                 names = m->readAccnos(accnosfile);
324                 
325                 if (m->control_pressed) { return 0; }
326         
327         if (countfile != "") {
328             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
329                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
330             }
331         }
332                 
333                 //read through the correct file and output lines you want to keep
334                 if (namefile != "")                     {               readName();             }
335                 if (fastafile != "")            {               readFasta();    }
336         if (fastqfile != "")            {               readFastq();            }
337                 if (groupfile != "")            {               readGroup();    }
338                 if (alignfile != "")            {               readAlign();    }
339                 if (listfile != "")                     {               readList();             }
340                 if (taxfile != "")                      {               readTax();              }
341                 if (qualfile != "")                     {               readQual();             }
342         if (countfile != "")            {               readCount();            }
343                 
344                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
345         
346                 if (outputNames.size() != 0) {
347                         m->mothurOutEndLine();
348                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
349                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
350                         m->mothurOutEndLine();
351                         
352                         //set fasta file as new current fastafile
353                         string current = "";
354                         itTypes = outputTypes.find("fasta");
355                         if (itTypes != outputTypes.end()) {
356                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
357                         }
358                         
359                         itTypes = outputTypes.find("name");
360                         if (itTypes != outputTypes.end()) {
361                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
362                         }
363                         
364                         itTypes = outputTypes.find("group");
365                         if (itTypes != outputTypes.end()) {
366                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
367                         }
368                         
369                         itTypes = outputTypes.find("list");
370                         if (itTypes != outputTypes.end()) {
371                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
372                         }
373                         
374                         itTypes = outputTypes.find("taxonomy");
375                         if (itTypes != outputTypes.end()) {
376                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
377                         }
378                         
379                         itTypes = outputTypes.find("qfile");
380                         if (itTypes != outputTypes.end()) {
381                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
382                         }       
383             
384             itTypes = outputTypes.find("count");
385                         if (itTypes != outputTypes.end()) {
386                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
387                         }
388                 }
389                 
390                 return 0;               
391         }
392
393         catch(exception& e) {
394                 m->errorOut(e, "RemoveSeqsCommand", "execute");
395                 exit(1);
396         }
397 }
398
399 //**********************************************************************************************************************
400 int RemoveSeqsCommand::readFasta(){
401         try {
402                 string thisOutputDir = outputDir;
403                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
404                 map<string, string> variables; 
405         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
406         variables["[extension]"] = m->getExtension(fastafile);
407                 string outputFileName = getOutputFileName("fasta", variables);
408                 
409                 ofstream out;
410                 m->openOutputFile(outputFileName, out);
411                 
412                 ifstream in;
413                 m->openInputFile(fastafile, in);
414                 string name;
415                 
416                 bool wroteSomething = false;
417                 int removedCount = 0;
418                 
419                 while(!in.eof()){
420                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
421                         
422                         Sequence currSeq(in);
423             
424             if (!dups) {//adjust name if needed
425                 map<string, string>::iterator it = uniqueMap.find(currSeq.getName());
426                 if (it != uniqueMap.end()) { currSeq.setName(it->second); }
427             }
428
429                         name = currSeq.getName();
430                         
431                         if (name != "") {
432                                 //if this name is in the accnos file
433                                 if (names.count(name) == 0) {
434                                         wroteSomething = true;
435                                         
436                     currSeq.printSequence(out);
437                                 }else {  removedCount++;  }
438                         }
439                         m->gobble(in);
440                 }
441                 in.close();     
442                 out.close();
443                 
444                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
445                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
446                 
447                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
448                 
449                 return 0;
450                 
451         }
452         catch(exception& e) {
453                 m->errorOut(e, "RemoveSeqsCommand", "readFasta");
454                 exit(1);
455         }
456 }
457 //**********************************************************************************************************************
458 int RemoveSeqsCommand::readFastq(){
459         try {
460                 bool wroteSomething = false;
461                 int removedCount = 0;
462         
463                 ifstream in;
464                 m->openInputFile(fastqfile, in);
465                 
466                 string thisOutputDir = outputDir;
467                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastqfile);  }
468                 map<string, string> variables;
469         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
470         variables["[extension]"] = m->getExtension(fastqfile);
471                 string outputFileName = getOutputFileName("fastq", variables);
472                 ofstream out;
473                 m->openOutputFile(outputFileName, out);
474         
475                 
476                 while(!in.eof()){
477                         
478                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
479                         
480                         //read sequence name
481                         string input = m->getline(in); m->gobble(in);
482                         
483             string outputString = input + "\n";
484             
485                         if (input[0] == '@') {
486                 //get rest of lines
487                 outputString += m->getline(in) + "\n"; m->gobble(in);
488                 outputString += m->getline(in) + "\n"; m->gobble(in);
489                 outputString += m->getline(in) + "\n"; m->gobble(in);
490                 
491                 vector<string> splits = m->splitWhiteSpace(input);
492                 string name = splits[0];
493                 name = name.substr(1);
494                 m->checkName(name);
495                 
496                 if (names.count(name) == 0) {
497                                         wroteSomething = true;
498                     out << outputString;
499                 }else { removedCount++; }
500             }
501             
502                         m->gobble(in);
503                 }
504                 in.close();
505                 out.close();
506                 
507                 
508                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
509                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
510                 
511                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
512
513                 
514                 return 0;
515         
516         }
517         catch(exception& e) {
518                 m->errorOut(e, "RemoveSeqsCommand", "readFastq");
519                 exit(1);
520         }
521 }
522 //**********************************************************************************************************************
523 int RemoveSeqsCommand::readQual(){
524         try {
525                 string thisOutputDir = outputDir;
526                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
527                 map<string, string> variables; 
528         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile));
529         variables["[extension]"] = m->getExtension(qualfile);
530                 string outputFileName = getOutputFileName("qfile", variables);
531                 ofstream out;
532                 m->openOutputFile(outputFileName, out);
533                 
534                 
535                 ifstream in;
536                 m->openInputFile(qualfile, in);
537                 string name;
538                 
539                 bool wroteSomething = false;
540                 int removedCount = 0;
541                 
542                 
543                 while(!in.eof()){       
544                         string saveName = "";
545                         string name = "";
546                         string scores = "";
547                         
548                         in >> name; 
549                         
550                         if (name.length() != 0) { 
551                                 saveName = name.substr(1);
552                                 while (!in.eof())       {       
553                                         char c = in.get(); 
554                                         if (c == 10 || c == 13 || c == -1){     break;  }
555                                         else { name += c; }     
556                                 } 
557                                 m->gobble(in);
558                         }
559                         
560                         while(in){
561                                 char letter= in.get();
562                                 if(letter == '>'){      in.putback(letter);     break;  }
563                                 else{ scores += letter; }
564                         }
565                         
566                         m->gobble(in);
567                         
568             if (!dups) {//adjust name if needed
569                 map<string, string>::iterator it = uniqueMap.find(saveName);
570                 if (it != uniqueMap.end()) { name = ">" + it->second; saveName = it->second; }
571             }
572             
573                         if (names.count(saveName) == 0) {
574                                 wroteSomething = true;
575                                 
576                                 out << name << endl << scores;
577                         }else {  removedCount++;  }
578                         
579                         m->gobble(in);
580                 }
581                 in.close();
582                 out.close();
583                 
584                 
585                 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
586                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
587                 
588                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your quality file."); m->mothurOutEndLine();
589                 
590                 return 0;
591                 
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "RemoveSeqsCommand", "readQual");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599 int RemoveSeqsCommand::readCount(){
600         try {
601         
602                 string thisOutputDir = outputDir;
603                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
604                 map<string, string> variables; 
605                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
606         variables["[extension]"] = m->getExtension(countfile);
607                 string outputFileName = getOutputFileName("count", variables);
608                 
609                 ofstream out;
610                 m->openOutputFile(outputFileName, out);
611                 
612                 ifstream in;
613                 m->openInputFile(countfile, in);
614                 
615                 bool wroteSomething = false;
616                 int removedCount = 0;
617                 
618         string headers = m->getline(in); m->gobble(in);
619         out << headers << endl;
620         
621         string name, rest; int thisTotal;
622         while (!in.eof()) {
623             
624             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
625             
626             in >> name; m->gobble(in); 
627             in >> thisTotal; m->gobble(in);
628             rest = m->getline(in); m->gobble(in);
629             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
630             
631             if (names.count(name) == 0) {
632                 out << name << '\t' << thisTotal << '\t' << rest << endl;
633                 wroteSomething = true;
634             }else { removedCount += thisTotal; }
635         }
636         in.close();
637                 out.close();
638         
639         //check for groups that have been eliminated
640         CountTable ct;
641         if (ct.testGroups(outputFileName)) {
642             ct.readTable(outputFileName, true, false);
643             ct.printTable(outputFileName);
644         }
645
646                 
647                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
648                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
649                 
650                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
651         
652                 return 0;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "RemoveSeqsCommand", "readCount");
656                 exit(1);
657         }
658 }
659 //**********************************************************************************************************************
660 int RemoveSeqsCommand::readList(){
661         try {
662                 string thisOutputDir = outputDir;
663                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
664                 map<string, string> variables; 
665                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
666         variables["[extension]"] = m->getExtension(listfile);
667                 string outputFileName = getOutputFileName("list", variables);   
668                 ofstream out;
669                 m->openOutputFile(outputFileName, out);
670                 
671                 ifstream in;
672                 m->openInputFile(listfile, in);
673                 
674                 bool wroteSomething = false;
675                 int removedCount = 0;
676                 
677                 while(!in.eof()){
678                         
679                         removedCount = 0;
680                         
681                         //read in list vector
682                         ListVector list(in);
683                         
684                         //make a new list vector
685                         ListVector newList;
686                         newList.setLabel(list.getLabel());
687                         
688                         //for each bin
689                         for (int i = 0; i < list.getNumBins(); i++) {
690                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
691                         
692                                 //parse out names that are in accnos file
693                                 string binnames = list.get(i);
694                                 
695                                 string newNames = "";
696                                 while (binnames.find_first_of(',') != -1) { 
697                                         string name = binnames.substr(0,binnames.find_first_of(','));
698                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
699                                         
700                                         //if that name is in the .accnos file, add it
701                                         if (names.count(name) == 0) {  newNames += name + ",";  }
702                                         else {  removedCount++;  }
703                                 }
704                         
705                                 //get last name
706                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
707                                 else {  removedCount++;  }
708
709                                 //if there are names in this bin add to new list
710                                 if (newNames != "") {  
711                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
712                                         newList.push_back(newNames);    
713                                 }
714                         }
715                                 
716                         //print new listvector
717                         if (newList.getNumBins() != 0) {
718                                 wroteSomething = true;
719                                 newList.print(out);
720                         }
721                         
722                         m->gobble(in);
723                 }
724                 in.close();     
725                 out.close();
726                 
727                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
728                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
729                 
730                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
731                 
732                 return 0;
733
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "RemoveSeqsCommand", "readList");
737                 exit(1);
738         }
739 }
740 //**********************************************************************************************************************
741 int RemoveSeqsCommand::readName(){
742         try {
743                 string thisOutputDir = outputDir;
744                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
745                 map<string, string> variables; 
746                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
747         variables["[extension]"] = m->getExtension(namefile);
748                 string outputFileName = getOutputFileName("name", variables);
749                 ofstream out;
750                 m->openOutputFile(outputFileName, out);
751
752                 ifstream in;
753                 m->openInputFile(namefile, in);
754                 string name, firstCol, secondCol;
755                 
756                 bool wroteSomething = false;
757                 int removedCount = 0;
758                 
759                 while(!in.eof()){
760                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
761                         
762                         in >> firstCol;         m->gobble(in);          
763                         in >> secondCol;                        
764                         
765                         vector<string> parsedNames;
766                         m->splitAtComma(secondCol, parsedNames);
767                         
768                         vector<string> validSecond;  validSecond.clear();
769                         for (int i = 0; i < parsedNames.size(); i++) {
770                                 if (names.count(parsedNames[i]) == 0) {
771                                         validSecond.push_back(parsedNames[i]);
772                                 }
773                         }
774                         
775                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
776                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
777                                 removedCount += parsedNames.size();
778                         }else {
779                                 removedCount += parsedNames.size()-validSecond.size();
780                                 //if the name in the first column is in the set then print it and any other names in second column also in set
781                                 if (names.count(firstCol) == 0) {
782                                         
783                                         wroteSomething = true;
784                                         
785                                         out << firstCol << '\t';
786                                         
787                                         //you know you have at least one valid second since first column is valid
788                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
789                                         out << validSecond[validSecond.size()-1] << endl;
790                                         
791                                         //make first name in set you come to first column and then add the remaining names to second column
792                                 }else {
793                                         
794                                         //you want part of this row
795                                         if (validSecond.size() != 0) {
796                                                 
797                                                 wroteSomething = true;
798                                                 
799                                                 out << validSecond[0] << '\t';
800                         //we are changing the unique name in the fasta file
801                         uniqueMap[firstCol] = validSecond[0];
802                                                 
803                                                 //you know you have at least one valid second since first column is valid
804                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
805                                                 out << validSecond[validSecond.size()-1] << endl;
806                                         }
807                                 }
808                         }
809                         m->gobble(in);
810                 }
811                 in.close();
812                 out.close();
813                 
814                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
815                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
816                 
817                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
818                 
819                 return 0;
820         }
821         catch(exception& e) {
822                 m->errorOut(e, "RemoveSeqsCommand", "readName");
823                 exit(1);
824         }
825 }
826
827 //**********************************************************************************************************************
828 int RemoveSeqsCommand::readGroup(){
829         try {
830                 string thisOutputDir = outputDir;
831                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
832                 map<string, string> variables; 
833                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
834         variables["[extension]"] = m->getExtension(groupfile);
835                 string outputFileName = getOutputFileName("group", variables);  
836                 ofstream out;
837                 m->openOutputFile(outputFileName, out);
838
839                 ifstream in;
840                 m->openInputFile(groupfile, in);
841                 string name, group;
842                 
843                 bool wroteSomething = false;
844                 int removedCount = 0;
845                 
846                 while(!in.eof()){
847                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
848                         
849                         in >> name;                             //read from first column
850                         in >> group;                    //read from second column
851                         
852                         //if this name is in the accnos file
853                         if (names.count(name) == 0) {
854                                 wroteSomething = true;
855                                 out << name << '\t' << group << endl;
856                         }else {  removedCount++;  }
857                                         
858                         m->gobble(in);
859                 }
860                 in.close();
861                 out.close();
862                 
863                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
864                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
865                 
866                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
867
868                 
869                 return 0;
870         }
871         catch(exception& e) {
872                 m->errorOut(e, "RemoveSeqsCommand", "readGroup");
873                 exit(1);
874         }
875 }
876 //**********************************************************************************************************************
877 int RemoveSeqsCommand::readTax(){
878         try {
879                 string thisOutputDir = outputDir;
880                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
881                 map<string, string> variables; 
882                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
883         variables["[extension]"] = m->getExtension(taxfile);
884                 string outputFileName = getOutputFileName("taxonomy", variables);
885                 ofstream out;
886                 m->openOutputFile(outputFileName, out);
887
888                 ifstream in;
889                 m->openInputFile(taxfile, in);
890                 string name, tax;
891                 
892                 bool wroteSomething = false;
893                 int removedCount = 0;
894                 
895                 while(!in.eof()){
896                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
897                         
898                         in >> name;     m->gobble(in);                  //read from first column
899                         in >> tax;                      //read from second column
900                         
901             if (!dups) {//adjust name if needed
902                 map<string, string>::iterator it = uniqueMap.find(name);
903                 if (it != uniqueMap.end()) { name = it->second; }
904             }
905             
906                         //if this name is in the accnos file
907                         if (names.count(name) == 0) {
908                                 wroteSomething = true;
909             
910                                 out << name << '\t' << tax << endl;
911                         }else {  removedCount++;  }
912                                         
913                         m->gobble(in);
914                 }
915                 in.close();
916                 out.close();
917                 
918                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
919                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
920                 
921                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
922                 
923                 return 0;
924         }
925         catch(exception& e) {
926                 m->errorOut(e, "RemoveSeqsCommand", "readTax");
927                 exit(1);
928         }
929 }
930 //**********************************************************************************************************************
931 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
932 int RemoveSeqsCommand::readAlign(){
933         try {
934                 string thisOutputDir = outputDir;
935                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
936                 map<string, string> variables; 
937                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
938                 string outputFileName = getOutputFileName("alignreport", variables);
939                 
940                 ofstream out;
941                 m->openOutputFile(outputFileName, out);
942
943                 ifstream in;
944                 m->openInputFile(alignfile, in);
945                 string name, junk;
946                 
947                 bool wroteSomething = false;
948                 int removedCount = 0;
949                 
950                 //read column headers
951                 for (int i = 0; i < 16; i++) {  
952                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
953                         else                    {       break;                  }
954                 }
955                 out << endl;
956                 
957                 while(!in.eof()){
958                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
959                         
960                         in >> name;                             //read from first column
961             
962             if (!dups) {//adjust name if needed
963                 map<string, string>::iterator it = uniqueMap.find(name);
964                 if (it != uniqueMap.end()) { name = it->second; }
965             }
966                         
967                         //if this name is in the accnos file
968                         if (names.count(name) == 0) {
969                                 wroteSomething = true;
970                                 
971                                 out << name << '\t';
972                                 
973                                 //read rest
974                                 for (int i = 0; i < 15; i++) {  
975                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
976                                         else                    {       break;                  }
977                                 }
978                                 out << endl;
979                                 
980                         }else {//still read just don't do anything with it
981                                 removedCount++;  
982                                 
983                                 //read rest
984                                 for (int i = 0; i < 15; i++) {  
985                                         if (!in.eof())  {       in >> junk;             }
986                                         else                    {       break;                  }
987                                 }
988                         }
989                         
990                         m->gobble(in);
991                 }
992                 in.close();
993                 out.close();
994                 
995                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
996                 outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
997                 
998                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
999
1000                 
1001                 return 0;
1002                 
1003         }
1004         catch(exception& e) {
1005                 m->errorOut(e, "RemoveSeqsCommand", "readAlign");
1006                 exit(1);
1007         }
1008 }
1009 //**********************************************************************************************************************
1010
1011