]> git.donarmstrong.com Git - mothur.git/blob - removeseqscommand.cpp
fixes while testing 1.33.0
[mothur.git] / removeseqscommand.cpp
1 /*
2  *  removeseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> RemoveSeqsCommand::setParameters(){      
17         try {
18         CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
25                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
26                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile);
27                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
28                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "RemoveSeqsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string RemoveSeqsCommand::getHelpString(){      
43         try {
44                 string helpString = "";
45                 helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
46                 helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
47                 helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, fastq and dups.  You must provide accnos and at least one of the file parameters.\n";
48                 helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
49                 helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
50                 helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "RemoveSeqsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string RemoveSeqsCommand::getOutputPattern(string type) {
61     try {
62         string pattern = "";
63         
64         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
65         else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
66         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
67         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
68         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
69         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
70         else if (type == "list")        {   pattern = "[filename],[distance],pick,[extension]";    }
71         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
72         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "GetSeqsCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 RemoveSeqsCommand::RemoveSeqsCommand(){ 
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["fasta"] = tempOutNames;
89         outputTypes["fastq"] = tempOutNames;
90                 outputTypes["taxonomy"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["alignreport"] = tempOutNames;
94                 outputTypes["list"] = tempOutNames;
95                 outputTypes["qfile"] = tempOutNames;
96         outputTypes["count"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["fasta"] = tempOutNames;
129             outputTypes["fastq"] = tempOutNames;
130                         outputTypes["taxonomy"] = tempOutNames;
131                         outputTypes["name"] = tempOutNames;
132                         outputTypes["group"] = tempOutNames;
133                         outputTypes["alignreport"] = tempOutNames;
134                         outputTypes["list"] = tempOutNames;
135                         outputTypes["qfile"] = tempOutNames;
136             outputTypes["count"] = tempOutNames;
137                         
138                         //if the user changes the output directory command factory will send this info to us in the output parameter 
139                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
140                         
141                         //if the user changes the input directory command factory will send this info to us in the output parameter 
142                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
143                         if (inputDir == "not found"){   inputDir = "";          }
144                         else {
145                                 string path;
146                                 it = parameters.find("alignreport");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
152                                 }
153                                 
154                                 it = parameters.find("fasta");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
160                                 }
161                                 
162                                 it = parameters.find("accnos");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
168                                 }
169                                 
170                                 it = parameters.find("list");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("name");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
184                                 }
185                                 
186                                 it = parameters.find("group");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
192                                 }
193                                 
194                                 it = parameters.find("taxonomy");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
200                                 }
201                                 
202                                 it = parameters.find("qfile");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
208                                 }
209                 
210                 it = parameters.find("count");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
216                                 }
217                 
218                 it = parameters.find("fastq");
219                                 //user has given a template file
220                                 if(it != parameters.end()){
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
224                                 }
225                         }
226
227                         
228                         //check for required parameters
229                         accnosfile = validParameter.validFile(parameters, "accnos", true);
230                         if (accnosfile == "not open") { abort = true; }
231                         else if (accnosfile == "not found") {  
232                                 accnosfile = m->getAccnosFile(); 
233                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
234                                 else { 
235                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
236                                         abort = true;
237                                 }  
238                         }else { m->setAccnosFile(accnosfile); } 
239                         
240                         fastafile = validParameter.validFile(parameters, "fasta", true);
241                         if (fastafile == "not open") { fastafile = ""; abort = true; }
242                         else if (fastafile == "not found") {  fastafile = "";  }        
243                         else { m->setFastaFile(fastafile); }
244                                                                    
245                         namefile = validParameter.validFile(parameters, "name", true);
246                         if (namefile == "not open") { namefile = ""; abort = true; }
247                         else if (namefile == "not found") {  namefile = "";  }  
248                         else { m->setNameFile(namefile); } 
249                                                                    
250                         groupfile = validParameter.validFile(parameters, "group", true);
251                         if (groupfile == "not open") { abort = true; }
252                         else if (groupfile == "not found") {  groupfile = "";  }
253                         else { m->setGroupFile(groupfile); }
254                         
255                         alignfile = validParameter.validFile(parameters, "alignreport", true);
256                         if (alignfile == "not open") { abort = true; }
257                         else if (alignfile == "not found") {  alignfile = "";  }
258                         
259                         listfile = validParameter.validFile(parameters, "list", true);
260                         if (listfile == "not open") { abort = true; }
261                         else if (listfile == "not found") {  listfile = "";  }
262                         else { m->setListFile(listfile); }
263                         
264                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
265                         if (taxfile == "not open") { abort = true; }
266                         else if (taxfile == "not found") {  taxfile = "";  }
267                         else { m->setTaxonomyFile(taxfile); }
268                         
269                         qualfile = validParameter.validFile(parameters, "qfile", true);
270                         if (qualfile == "not open") { abort = true; }
271                         else if (qualfile == "not found") {  qualfile = "";  }                  
272                         else { m->setQualFile(qualfile); }
273             
274             fastqfile = validParameter.validFile(parameters, "fastq", true);
275                         if (fastqfile == "not open") { abort = true; }
276                         else if (fastqfile == "not found") {  fastqfile = "";  }
277                         
278                         string usedDups = "true";
279                         string temp = validParameter.validFile(parameters, "dups", false);      
280                         if (temp == "not found") { 
281                                 if (namefile != "") {  temp = "true";                                   }
282                                 else                            {  temp = "false"; usedDups = "";       }
283                         }
284                         dups = m->isTrue(temp);
285             
286             countfile = validParameter.validFile(parameters, "count", true);
287             if (countfile == "not open") { countfile = ""; abort = true; }
288             else if (countfile == "not found") { countfile = "";  }     
289             else { m->setCountTableFile(countfile); }
290             
291             if ((namefile != "") && (countfile != "")) {
292                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
293             }
294             
295             if ((groupfile != "") && (countfile != "")) {
296                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
297             }
298                         
299                         if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport, fastq or list."); m->mothurOutEndLine(); abort = true; }
300                         
301             if (countfile == "") {
302                 if ((fastafile != "") && (namefile == "")) {
303                     vector<string> files; files.push_back(fastafile);
304                     parser.getNameFile(files);
305                 }
306             }
307                 }
308
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
312                 exit(1);
313         }
314 }
315 //**********************************************************************************************************************
316
317 int RemoveSeqsCommand::execute(){
318         try {
319                 
320                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
321                 
322                 //get names you want to keep
323                 names = m->readAccnos(accnosfile);
324                 
325                 if (m->control_pressed) { return 0; }
326         
327         if (countfile != "") {
328             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
329                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
330             }
331         }
332                 
333                 //read through the correct file and output lines you want to keep
334                 if (namefile != "")                     {               readName();             }
335                 if (fastafile != "")            {               readFasta();    }
336         if (fastqfile != "")            {               readFastq();            }
337                 if (groupfile != "")            {               readGroup();    }
338                 if (alignfile != "")            {               readAlign();    }
339                 if (listfile != "")                     {               readList();             }
340                 if (taxfile != "")                      {               readTax();              }
341                 if (qualfile != "")                     {               readQual();             }
342         if (countfile != "")            {               readCount();            }
343                 
344                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
345         
346                 if (outputNames.size() != 0) {
347                         m->mothurOutEndLine();
348                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
349                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
350                         m->mothurOutEndLine();
351                         
352                         //set fasta file as new current fastafile
353                         string current = "";
354                         itTypes = outputTypes.find("fasta");
355                         if (itTypes != outputTypes.end()) {
356                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
357                         }
358                         
359                         itTypes = outputTypes.find("name");
360                         if (itTypes != outputTypes.end()) {
361                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
362                         }
363                         
364                         itTypes = outputTypes.find("group");
365                         if (itTypes != outputTypes.end()) {
366                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
367                         }
368                         
369                         itTypes = outputTypes.find("list");
370                         if (itTypes != outputTypes.end()) {
371                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
372                         }
373                         
374                         itTypes = outputTypes.find("taxonomy");
375                         if (itTypes != outputTypes.end()) {
376                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
377                         }
378                         
379                         itTypes = outputTypes.find("qfile");
380                         if (itTypes != outputTypes.end()) {
381                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
382                         }       
383             
384             itTypes = outputTypes.find("count");
385                         if (itTypes != outputTypes.end()) {
386                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
387                         }
388                 }
389                 
390                 return 0;               
391         }
392
393         catch(exception& e) {
394                 m->errorOut(e, "RemoveSeqsCommand", "execute");
395                 exit(1);
396         }
397 }
398
399 //**********************************************************************************************************************
400 int RemoveSeqsCommand::readFasta(){
401         try {
402                 string thisOutputDir = outputDir;
403                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
404                 map<string, string> variables; 
405         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
406         variables["[extension]"] = m->getExtension(fastafile);
407                 string outputFileName = getOutputFileName("fasta", variables);
408                 
409                 ofstream out;
410                 m->openOutputFile(outputFileName, out);
411                 
412                 ifstream in;
413                 m->openInputFile(fastafile, in);
414                 string name;
415                 
416                 bool wroteSomething = false;
417                 int removedCount = 0;
418                 
419                 while(!in.eof()){
420                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
421                         
422                         Sequence currSeq(in);
423             
424             if (!dups) {//adjust name if needed
425                 map<string, string>::iterator it = uniqueMap.find(currSeq.getName());
426                 if (it != uniqueMap.end()) { currSeq.setName(it->second); }
427             }
428
429                         name = currSeq.getName();
430                         
431                         if (name != "") {
432                                 //if this name is in the accnos file
433                                 if (names.count(name) == 0) {
434                                         wroteSomething = true;
435                                         
436                     currSeq.printSequence(out);
437                                 }else {  removedCount++;  }
438                         }
439                         m->gobble(in);
440                 }
441                 in.close();     
442                 out.close();
443                 
444                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
445                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
446                 
447                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
448                 
449                 return 0;
450                 
451         }
452         catch(exception& e) {
453                 m->errorOut(e, "RemoveSeqsCommand", "readFasta");
454                 exit(1);
455         }
456 }
457 //**********************************************************************************************************************
458 int RemoveSeqsCommand::readFastq(){
459         try {
460                 bool wroteSomething = false;
461                 int removedCount = 0;
462         
463                 ifstream in;
464                 m->openInputFile(fastqfile, in);
465                 
466                 string thisOutputDir = outputDir;
467                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastqfile);  }
468                 map<string, string> variables;
469         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
470         variables["[extension]"] = m->getExtension(fastqfile);
471                 string outputFileName = getOutputFileName("fastq", variables);
472                 ofstream out;
473                 m->openOutputFile(outputFileName, out);
474         
475                 
476                 while(!in.eof()){
477                         
478                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
479                         
480                         //read sequence name
481                         string input = m->getline(in); m->gobble(in);
482                         
483             string outputString = input + "\n";
484             
485                         if (input[0] == '@') {
486                 //get rest of lines
487                 outputString += m->getline(in) + "\n"; m->gobble(in);
488                 outputString += m->getline(in) + "\n"; m->gobble(in);
489                 outputString += m->getline(in) + "\n"; m->gobble(in);
490                 
491                 vector<string> splits = m->splitWhiteSpace(input);
492                 string name = splits[0];
493                 name = name.substr(1);
494                 m->checkName(name);
495                 
496                 if (names.count(name) == 0) {
497                                         wroteSomething = true;
498                     out << outputString;
499                 }else { removedCount++; }
500             }
501             
502                         m->gobble(in);
503                 }
504                 in.close();
505                 out.close();
506                 
507                 
508                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
509                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
510                 
511                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
512
513                 
514                 return 0;
515         
516         }
517         catch(exception& e) {
518                 m->errorOut(e, "RemoveSeqsCommand", "readFastq");
519                 exit(1);
520         }
521 }
522 //**********************************************************************************************************************
523 int RemoveSeqsCommand::readQual(){
524         try {
525                 string thisOutputDir = outputDir;
526                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
527                 map<string, string> variables; 
528         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile));
529         variables["[extension]"] = m->getExtension(qualfile);
530                 string outputFileName = getOutputFileName("qfile", variables);
531                 ofstream out;
532                 m->openOutputFile(outputFileName, out);
533                 
534                 
535                 ifstream in;
536                 m->openInputFile(qualfile, in);
537                 string name;
538                 
539                 bool wroteSomething = false;
540                 int removedCount = 0;
541                 
542                 
543                 while(!in.eof()){       
544                         string saveName = "";
545                         string name = "";
546                         string scores = "";
547                         
548                         in >> name; 
549                         
550                         if (name.length() != 0) { 
551                                 saveName = name.substr(1);
552                                 while (!in.eof())       {       
553                                         char c = in.get(); 
554                                         if (c == 10 || c == 13 || c == -1){     break;  }
555                                         else { name += c; }     
556                                 } 
557                                 m->gobble(in);
558                         }
559                         
560                         while(in){
561                                 char letter= in.get();
562                                 if(letter == '>'){      in.putback(letter);     break;  }
563                                 else{ scores += letter; }
564                         }
565                         
566                         m->gobble(in);
567                         
568             if (!dups) {//adjust name if needed
569                 map<string, string>::iterator it = uniqueMap.find(saveName);
570                 if (it != uniqueMap.end()) { name = ">" + it->second; saveName = it->second; }
571             }
572             
573                         if (names.count(saveName) == 0) {
574                                 wroteSomething = true;
575                                 
576                                 out << name << endl << scores;
577                         }else {  removedCount++;  }
578                         
579                         m->gobble(in);
580                 }
581                 in.close();
582                 out.close();
583                 
584                 
585                 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
586                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
587                 
588                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your quality file."); m->mothurOutEndLine();
589                 
590                 return 0;
591                 
592         }
593         catch(exception& e) {
594                 m->errorOut(e, "RemoveSeqsCommand", "readQual");
595                 exit(1);
596         }
597 }
598 //**********************************************************************************************************************
599 int RemoveSeqsCommand::readCount(){
600         try {
601         
602                 string thisOutputDir = outputDir;
603                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
604                 map<string, string> variables; 
605                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
606         variables["[extension]"] = m->getExtension(countfile);
607                 string outputFileName = getOutputFileName("count", variables);
608                 
609                 ofstream out;
610                 m->openOutputFile(outputFileName, out);
611                 
612                 ifstream in;
613                 m->openInputFile(countfile, in);
614                 
615                 bool wroteSomething = false;
616                 int removedCount = 0;
617                 
618         string headers = m->getline(in); m->gobble(in);
619         out << headers << endl;
620         
621         string name, rest; int thisTotal;
622         while (!in.eof()) {
623             
624             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
625             
626             in >> name; m->gobble(in); 
627             in >> thisTotal; m->gobble(in);
628             rest = m->getline(in); m->gobble(in);
629             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
630             
631             if (names.count(name) == 0) {
632                 out << name << '\t' << thisTotal << '\t' << rest << endl;
633                 wroteSomething = true;
634             }else { removedCount += thisTotal; }
635         }
636         in.close();
637                 out.close();
638         
639         //check for groups that have been eliminated
640         CountTable ct;
641         if (ct.testGroups(outputFileName)) {
642             ct.readTable(outputFileName, true, false);
643             ct.printTable(outputFileName);
644         }
645
646                 
647                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
648                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
649                 
650                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
651         
652                 return 0;
653         }
654         catch(exception& e) {
655                 m->errorOut(e, "RemoveSeqsCommand", "readCount");
656                 exit(1);
657         }
658 }
659 //**********************************************************************************************************************
660 int RemoveSeqsCommand::readList(){
661         try {
662                 string thisOutputDir = outputDir;
663                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
664                 map<string, string> variables; 
665                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
666         variables["[extension]"] = m->getExtension(listfile);
667                                 
668                 ifstream in;
669                 m->openInputFile(listfile, in);
670                 
671                 bool wroteSomething = false;
672                 int removedCount = 0;
673                 
674                 while(!in.eof()){
675                         
676                         removedCount = 0;
677                         
678                         //read in list vector
679                         ListVector list(in);
680                         
681                         //make a new list vector
682                         ListVector newList;
683                         newList.setLabel(list.getLabel());
684             
685                         variables["[distance]"] = list.getLabel();
686             string outputFileName = getOutputFileName("list", variables);
687                         
688                         ofstream out;
689                         m->openOutputFile(outputFileName, out);
690                         outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
691             
692             vector<string> binLabels = list.getLabels();
693             vector<string> newBinLabels;
694             
695             if (m->control_pressed) { in.close(); out.close();  return 0; }
696
697                         //for each bin
698                         for (int i = 0; i < list.getNumBins(); i++) {
699                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
700                         
701                                 //parse out names that are in accnos file
702                                 string bin = list.get(i);
703                 vector<string> bnames;
704                 m->splitAtComma(bin, bnames);
705                                 
706                                 string newNames = "";
707                 for (int j = 0; j < bnames.size(); j++) {
708                                         string name = bnames[j];
709                     //if that name is in the .accnos file, add it
710                                         if (names.count(name) == 0) {  newNames += name + ",";  }
711                                         else {  removedCount++;  }
712                 }
713
714                                 //if there are names in this bin add to new list
715                                 if (newNames != "") {  
716                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
717                                         newList.push_back(newNames);
718                     newBinLabels.push_back(binLabels[i]);
719                                 }
720                         }
721                                 
722                         //print new listvector
723                         if (newList.getNumBins() != 0) {
724                                 wroteSomething = true;
725                                 newList.setLabels(newBinLabels);
726                 newList.printHeaders(out);
727                                 newList.print(out);
728
729                         }
730                         
731                         m->gobble(in);
732             out.close();
733                 }
734                 in.close();     
735                 
736                 
737                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
738                 
739                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
740                 
741                 return 0;
742
743         }
744         catch(exception& e) {
745                 m->errorOut(e, "RemoveSeqsCommand", "readList");
746                 exit(1);
747         }
748 }
749 //**********************************************************************************************************************
750 int RemoveSeqsCommand::readName(){
751         try {
752                 string thisOutputDir = outputDir;
753                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
754                 map<string, string> variables; 
755                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
756         variables["[extension]"] = m->getExtension(namefile);
757                 string outputFileName = getOutputFileName("name", variables);
758                 ofstream out;
759                 m->openOutputFile(outputFileName, out);
760
761                 ifstream in;
762                 m->openInputFile(namefile, in);
763                 string name, firstCol, secondCol;
764                 
765                 bool wroteSomething = false;
766                 int removedCount = 0;
767                 
768                 while(!in.eof()){
769                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
770                         
771                         in >> firstCol;         m->gobble(in);          
772                         in >> secondCol;                        
773                         
774                         vector<string> parsedNames;
775                         m->splitAtComma(secondCol, parsedNames);
776                         
777                         vector<string> validSecond;  validSecond.clear();
778                         for (int i = 0; i < parsedNames.size(); i++) {
779                                 if (names.count(parsedNames[i]) == 0) {
780                                         validSecond.push_back(parsedNames[i]);
781                                 }
782                         }
783                         
784                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
785                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
786                                 removedCount += parsedNames.size();
787                         }else {
788                                 removedCount += parsedNames.size()-validSecond.size();
789                                 //if the name in the first column is in the set then print it and any other names in second column also in set
790                                 if (names.count(firstCol) == 0) {
791                                         
792                                         wroteSomething = true;
793                                         
794                                         out << firstCol << '\t';
795                                         
796                                         //you know you have at least one valid second since first column is valid
797                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
798                                         out << validSecond[validSecond.size()-1] << endl;
799                                         
800                                         //make first name in set you come to first column and then add the remaining names to second column
801                                 }else {
802                                         
803                                         //you want part of this row
804                                         if (validSecond.size() != 0) {
805                                                 
806                                                 wroteSomething = true;
807                                                 
808                                                 out << validSecond[0] << '\t';
809                         //we are changing the unique name in the fasta file
810                         uniqueMap[firstCol] = validSecond[0];
811                                                 
812                                                 //you know you have at least one valid second since first column is valid
813                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
814                                                 out << validSecond[validSecond.size()-1] << endl;
815                                         }
816                                 }
817                         }
818                         m->gobble(in);
819                 }
820                 in.close();
821                 out.close();
822                 
823                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
824                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
825                 
826                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
827                 
828                 return 0;
829         }
830         catch(exception& e) {
831                 m->errorOut(e, "RemoveSeqsCommand", "readName");
832                 exit(1);
833         }
834 }
835
836 //**********************************************************************************************************************
837 int RemoveSeqsCommand::readGroup(){
838         try {
839                 string thisOutputDir = outputDir;
840                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
841                 map<string, string> variables; 
842                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
843         variables["[extension]"] = m->getExtension(groupfile);
844                 string outputFileName = getOutputFileName("group", variables);  
845                 ofstream out;
846                 m->openOutputFile(outputFileName, out);
847
848                 ifstream in;
849                 m->openInputFile(groupfile, in);
850                 string name, group;
851                 
852                 bool wroteSomething = false;
853                 int removedCount = 0;
854                 
855                 while(!in.eof()){
856                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
857                         
858                         in >> name;                             //read from first column
859                         in >> group;                    //read from second column
860                         
861                         //if this name is in the accnos file
862                         if (names.count(name) == 0) {
863                                 wroteSomething = true;
864                                 out << name << '\t' << group << endl;
865                         }else {  removedCount++;  }
866                                         
867                         m->gobble(in);
868                 }
869                 in.close();
870                 out.close();
871                 
872                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
873                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
874                 
875                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
876
877                 
878                 return 0;
879         }
880         catch(exception& e) {
881                 m->errorOut(e, "RemoveSeqsCommand", "readGroup");
882                 exit(1);
883         }
884 }
885 //**********************************************************************************************************************
886 int RemoveSeqsCommand::readTax(){
887         try {
888                 string thisOutputDir = outputDir;
889                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
890                 map<string, string> variables; 
891                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
892         variables["[extension]"] = m->getExtension(taxfile);
893                 string outputFileName = getOutputFileName("taxonomy", variables);
894                 ofstream out;
895                 m->openOutputFile(outputFileName, out);
896
897                 ifstream in;
898                 m->openInputFile(taxfile, in);
899                 string name, tax;
900                 
901                 bool wroteSomething = false;
902                 int removedCount = 0;
903                 
904                 while(!in.eof()){
905                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
906                         
907                         in >> name;     m->gobble(in);                  //read from first column
908                         in >> tax;                      //read from second column
909                         
910             if (!dups) {//adjust name if needed
911                 map<string, string>::iterator it = uniqueMap.find(name);
912                 if (it != uniqueMap.end()) { name = it->second; }
913             }
914             
915                         //if this name is in the accnos file
916                         if (names.count(name) == 0) {
917                                 wroteSomething = true;
918             
919                                 out << name << '\t' << tax << endl;
920                         }else {  removedCount++;  }
921                                         
922                         m->gobble(in);
923                 }
924                 in.close();
925                 out.close();
926                 
927                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
928                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
929                 
930                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
931                 
932                 return 0;
933         }
934         catch(exception& e) {
935                 m->errorOut(e, "RemoveSeqsCommand", "readTax");
936                 exit(1);
937         }
938 }
939 //**********************************************************************************************************************
940 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
941 int RemoveSeqsCommand::readAlign(){
942         try {
943                 string thisOutputDir = outputDir;
944                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
945                 map<string, string> variables; 
946                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
947                 string outputFileName = getOutputFileName("alignreport", variables);
948                 
949                 ofstream out;
950                 m->openOutputFile(outputFileName, out);
951
952                 ifstream in;
953                 m->openInputFile(alignfile, in);
954                 string name, junk;
955                 
956                 bool wroteSomething = false;
957                 int removedCount = 0;
958                 
959                 //read column headers
960                 for (int i = 0; i < 16; i++) {  
961                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
962                         else                    {       break;                  }
963                 }
964                 out << endl;
965                 
966                 while(!in.eof()){
967                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
968                         
969                         in >> name;                             //read from first column
970             
971             if (!dups) {//adjust name if needed
972                 map<string, string>::iterator it = uniqueMap.find(name);
973                 if (it != uniqueMap.end()) { name = it->second; }
974             }
975                         
976                         //if this name is in the accnos file
977                         if (names.count(name) == 0) {
978                                 wroteSomething = true;
979                                 
980                                 out << name << '\t';
981                                 
982                                 //read rest
983                                 for (int i = 0; i < 15; i++) {  
984                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
985                                         else                    {       break;                  }
986                                 }
987                                 out << endl;
988                                 
989                         }else {//still read just don't do anything with it
990                                 removedCount++;  
991                                 
992                                 //read rest
993                                 for (int i = 0; i < 15; i++) {  
994                                         if (!in.eof())  {       in >> junk;             }
995                                         else                    {       break;                  }
996                                 }
997                         }
998                         
999                         m->gobble(in);
1000                 }
1001                 in.close();
1002                 out.close();
1003                 
1004                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();  }
1005                 outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
1006                 
1007                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
1008
1009                 
1010                 return 0;
1011                 
1012         }
1013         catch(exception& e) {
1014                 m->errorOut(e, "RemoveSeqsCommand", "readAlign");
1015                 exit(1);
1016         }
1017 }
1018 //**********************************************************************************************************************
1019
1020