]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
added fastq to list.seqs, get.seqs and remove.seqs. fixed bug where venn command...
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> GetSeqsCommand::setParameters(){ 
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
19         CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
25                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
26                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile);
27                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
28                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos2);
32
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "GetSeqsCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string GetSeqsCommand::getHelpString(){ 
44         try {
45                 string helpString = "";
46                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
47                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
48                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, fastq and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
49                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
50                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
51                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60
61 //**********************************************************************************************************************
62 GetSeqsCommand::GetSeqsCommand(){       
63         try {
64                 abort = true; calledHelp = true;
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["fasta"] = tempOutNames;
68         outputTypes["fastq"] = tempOutNames;
69                 outputTypes["taxonomy"] = tempOutNames;
70                 outputTypes["name"] = tempOutNames;
71                 outputTypes["group"] = tempOutNames;
72                 outputTypes["alignreport"] = tempOutNames;
73                 outputTypes["list"] = tempOutNames;
74                 outputTypes["qfile"] = tempOutNames;
75         outputTypes["count"] = tempOutNames;
76                 outputTypes["accnosreport"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 string GetSeqsCommand::getOutputPattern(string type) {
85     try {
86         string pattern = "";
87         
88         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
89         else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
90         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
91         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
92         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
93         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
94         else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
95         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
96         else if (type == "accnosreport")      {   pattern = "[filename],pick.accnos.report";    }
97         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
98         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
99         
100         return pattern;
101     }
102     catch(exception& e) {
103         m->errorOut(e, "GetSeqsCommand", "getOutputPattern");
104         exit(1);
105     }
106 }
107 //**********************************************************************************************************************
108 GetSeqsCommand::GetSeqsCommand(string option)  {
109         try {
110                 abort = false; calledHelp = false;   
111                                 
112                 //allow user to run help
113                 if(option == "help") { help(); abort = true; calledHelp = true; }
114                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
115                 
116                 else {
117                         vector<string> myArray = setParameters();
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         map<string,string>::iterator it;
124                         
125                         //check to make sure all parameters are valid for command
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["fasta"] = tempOutNames;
133             outputTypes["fastq"] = tempOutNames;
134                         outputTypes["taxonomy"] = tempOutNames;
135                         outputTypes["name"] = tempOutNames;
136                         outputTypes["group"] = tempOutNames;
137                         outputTypes["alignreport"] = tempOutNames;
138                         outputTypes["list"] = tempOutNames;
139                         outputTypes["qfile"] = tempOutNames;
140                         outputTypes["accnosreport"] = tempOutNames;
141             outputTypes["count"] = tempOutNames;
142                         
143                         //if the user changes the output directory command factory will send this info to us in the output parameter 
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
148                         if (inputDir == "not found"){   inputDir = "";          }
149                         else {
150                                 string path;
151                                 it = parameters.find("alignreport");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
157                                 }
158                                 
159                                 it = parameters.find("fasta");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
165                                 }
166                                 
167                                 it = parameters.find("accnos");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
173                                 }
174                                 
175                                 it = parameters.find("accnos2");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
181                                 }
182                                 
183                                 it = parameters.find("list");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
189                                 }
190                                 
191                                 it = parameters.find("name");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
197                                 }
198                                 
199                                 it = parameters.find("group");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
205                                 }
206                                 
207                                 it = parameters.find("taxonomy");
208                                 //user has given a template file
209                                 if(it != parameters.end()){ 
210                                         path = m->hasPath(it->second);
211                                         //if the user has not given a path then, add inputdir. else leave path alone.
212                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
213                                 }
214                                 
215                                 it = parameters.find("qfile");
216                                 //user has given a template file
217                                 if(it != parameters.end()){ 
218                                         path = m->hasPath(it->second);
219                                         //if the user has not given a path then, add inputdir. else leave path alone.
220                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
221                                 }
222                 
223                 it = parameters.find("count");
224                                 //user has given a template file
225                                 if(it != parameters.end()){ 
226                                         path = m->hasPath(it->second);
227                                         //if the user has not given a path then, add inputdir. else leave path alone.
228                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
229                                 }
230                 
231                 it = parameters.find("fastq");
232                                 //user has given a template file
233                                 if(it != parameters.end()){
234                                         path = m->hasPath(it->second);
235                                         //if the user has not given a path then, add inputdir. else leave path alone.
236                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
237                                 }
238                         }
239
240                         
241                         //check for required parameters
242                         accnosfile = validParameter.validFile(parameters, "accnos", true);
243                         if (accnosfile == "not open") { abort = true; }
244                         else if (accnosfile == "not found") {  
245                                 accnosfile = m->getAccnosFile(); 
246                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
247                                 else { 
248                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
249                                         abort = true;
250                                 } 
251                         }else { m->setAccnosFile(accnosfile); } 
252                         
253                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
254                         
255                         fastafile = validParameter.validFile(parameters, "fasta", true);
256                         if (fastafile == "not open") { fastafile = ""; abort = true; }
257                         else if (fastafile == "not found") {  fastafile = "";  }
258                         else { m->setFastaFile(fastafile); }
259                         
260                         namefile = validParameter.validFile(parameters, "name", true);
261                         if (namefile == "not open") { namefile = ""; abort = true; }
262                         else if (namefile == "not found") {  namefile = "";  }  
263                         else { m->setNameFile(namefile); }
264                         
265                         groupfile = validParameter.validFile(parameters, "group", true);
266                         if (groupfile == "not open") { abort = true; }
267                         else if (groupfile == "not found") {  groupfile = "";  }        
268                         else { m->setGroupFile(groupfile); }
269                         
270                         alignfile = validParameter.validFile(parameters, "alignreport", true);
271                         if (alignfile == "not open") { abort = true; }
272                         else if (alignfile == "not found") {  alignfile = "";  }
273                         
274                         listfile = validParameter.validFile(parameters, "list", true);
275                         if (listfile == "not open") { abort = true; }
276                         else if (listfile == "not found") {  listfile = "";  }
277                         else { m->setListFile(listfile); }
278                         
279                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
280                         if (taxfile == "not open") { taxfile = ""; abort = true; }
281                         else if (taxfile == "not found") {  taxfile = "";  }
282                         else { m->setTaxonomyFile(taxfile); }
283                         
284                         qualfile = validParameter.validFile(parameters, "qfile", true);
285                         if (qualfile == "not open") { abort = true; }
286                         else if (qualfile == "not found") {  qualfile = "";  }
287                         else { m->setQualFile(qualfile); }
288             
289             fastqfile = validParameter.validFile(parameters, "fastq", true);
290                         if (fastqfile == "not open") { abort = true; }
291                         else if (fastqfile == "not found") {  fastqfile = "";  }
292                         
293                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
294                         if (accnosfile2 == "not open") { abort = true; }
295                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
296                         
297             countfile = validParameter.validFile(parameters, "count", true);
298             if (countfile == "not open") { countfile = ""; abort = true; }
299             else if (countfile == "not found") { countfile = "";  }     
300             else { m->setCountTableFile(countfile); }
301             
302             if ((namefile != "") && (countfile != "")) {
303                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
304             }
305             
306             if ((groupfile != "") && (countfile != "")) {
307                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
308             }
309
310                         
311                         string usedDups = "true";
312                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
313                         dups = m->isTrue(temp);
314                         
315                         if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality, fastq or listfile."); m->mothurOutEndLine(); abort = true; }
316             
317             if (countfile == "") {
318                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
319                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
320                     parser.getNameFile(files);
321                 }
322             }
323                 }
324
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
328                 exit(1);
329         }
330 }
331 //**********************************************************************************************************************
332
333 int GetSeqsCommand::execute(){
334         try {
335                 
336                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
337                 
338                 //get names you want to keep
339                 names = m->readAccnos(accnosfile);
340                 
341                 if (m->control_pressed) { return 0; }
342         
343         if (countfile != "") {
344             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
345                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
346             }
347         }
348                 
349                 //read through the correct file and output lines you want to keep
350                 if (namefile != "")                     {               readName();                     }
351                 if (fastafile != "")            {               readFasta();            }
352         if (fastqfile != "")            {               readFastq();            }
353                 if (groupfile != "")            {               readGroup();            }
354         if (countfile != "")            {               readCount();            }
355                 if (alignfile != "")            {               readAlign();            }
356                 if (listfile != "")                     {               readList();                     }
357                 if (taxfile != "")                      {               readTax();                      }
358                 if (qualfile != "")                     {               readQual();                     }
359                 if (accnosfile2 != "")          {               compareAccnos();        }
360         
361         if (m->debug) { runSanityCheck(); }
362                 
363                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
364                 
365                 
366                 if (outputNames.size() != 0) {
367                         m->mothurOutEndLine();
368                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
369                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
370                         m->mothurOutEndLine();
371                         
372                         //set fasta file as new current fastafile
373                         string current = "";
374                         itTypes = outputTypes.find("fasta");
375                         if (itTypes != outputTypes.end()) {
376                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
377                         }
378                         
379                         itTypes = outputTypes.find("name");
380                         if (itTypes != outputTypes.end()) {
381                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
382                         }
383                         
384                         itTypes = outputTypes.find("group");
385                         if (itTypes != outputTypes.end()) {
386                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
387                         }
388                         
389                         itTypes = outputTypes.find("list");
390                         if (itTypes != outputTypes.end()) {
391                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
392                         }
393                         
394                         itTypes = outputTypes.find("taxonomy");
395                         if (itTypes != outputTypes.end()) {
396                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
397                         }
398                         
399                         itTypes = outputTypes.find("qfile");
400                         if (itTypes != outputTypes.end()) {
401                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
402                         }
403                         
404             itTypes = outputTypes.find("count");
405                         if (itTypes != outputTypes.end()) {
406                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
407                         }
408                 }
409                 
410                 return 0;               
411         }
412
413         catch(exception& e) {
414                 m->errorOut(e, "GetSeqsCommand", "execute");
415                 exit(1);
416         }
417 }
418 //**********************************************************************************************************************
419 int GetSeqsCommand::readFastq(){
420         try {
421                 bool wroteSomething = false;
422                 int selectedCount = 0;
423         
424                 ifstream in;
425                 m->openInputFile(fastqfile, in);
426                 
427                 string thisOutputDir = outputDir;
428                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastqfile);  }
429                 map<string, string> variables;
430         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
431         variables["[extension]"] = m->getExtension(fastqfile);
432                 string outputFileName = getOutputFileName("fastq", variables);
433                 ofstream out;
434                 m->openOutputFile(outputFileName, out);
435
436                 
437                 while(!in.eof()){
438                         
439                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
440                         
441                         //read sequence name
442                         string input = m->getline(in); m->gobble(in);
443                         
444             string outputString = input + "\n";
445             
446                         if (input[0] == '@') {
447                 //get rest of lines
448                 outputString += m->getline(in) + "\n"; m->gobble(in);
449                 outputString += m->getline(in) + "\n"; m->gobble(in);
450                 outputString += m->getline(in) + "\n"; m->gobble(in);
451                 
452                 vector<string> splits = m->splitWhiteSpace(input);
453                 string name = splits[0];
454                 name = name.substr(1);
455                 m->checkName(name);
456                 
457                 if (names.count(name) != 0) {
458                                         wroteSomething = true;
459                     selectedCount++;
460                     out << outputString;
461                 }
462             }
463             
464                         m->gobble(in);
465                 }
466                 in.close();
467                 out.close();
468                 
469                 
470                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
471                 outputNames.push_back(outputFileName);  outputTypes["fastq"].push_back(outputFileName);
472                 
473                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
474                 
475                 return 0;
476         
477         }
478         catch(exception& e) {
479                 m->errorOut(e, "GetSeqsCommand", "readFastq");
480                 exit(1);
481         }
482 }
483
484 //**********************************************************************************************************************
485 int GetSeqsCommand::readFasta(){
486         try {
487                 string thisOutputDir = outputDir;
488                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
489                 map<string, string> variables; 
490         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
491         variables["[extension]"] = m->getExtension(fastafile);
492                 string outputFileName = getOutputFileName("fasta", variables);
493                 ofstream out;
494                 m->openOutputFile(outputFileName, out);
495                 
496                 
497                 ifstream in;
498                 m->openInputFile(fastafile, in);
499                 string name;
500                 
501                 bool wroteSomething = false;
502                 int selectedCount = 0;
503         
504         if (m->debug) { set<string> temp; sanity["fasta"] = temp; }
505                 
506                 while(!in.eof()){
507                 
508                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
509                         
510                         Sequence currSeq(in);
511                         name = currSeq.getName();
512             
513             if (!dups) {//adjust name if needed
514                 map<string, string>::iterator it = uniqueMap.find(name);
515                 if (it != uniqueMap.end()) { currSeq.setName(it->second); }
516             }
517                         
518             name = currSeq.getName();
519             
520                         if (name != "") {
521                                 //if this name is in the accnos file
522                                 if (names.count(name) != 0) {
523                                         wroteSomething = true;
524                                         
525                                         currSeq.printSequence(out);
526                                         selectedCount++;
527                     
528                     if (m->debug) { sanity["fasta"].insert(name); }
529                                 }
530                         }
531                         m->gobble(in);
532                 }
533                 in.close();     
534                 out.close();
535                 
536                 
537                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
538                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
539                 
540                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
541                 
542                 return 0;
543
544         }
545         catch(exception& e) {
546                 m->errorOut(e, "GetSeqsCommand", "readFasta");
547                 exit(1);
548         }
549 }
550 //**********************************************************************************************************************
551 int GetSeqsCommand::readQual(){
552         try {
553                 string thisOutputDir = outputDir;
554                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
555                 map<string, string> variables; 
556         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile));
557         variables["[extension]"] = m->getExtension(qualfile);
558                 string outputFileName = getOutputFileName("qfile", variables);
559                 ofstream out;
560                 m->openOutputFile(outputFileName, out);
561                 
562                 
563                 ifstream in;
564                 m->openInputFile(qualfile, in);
565                 string name;
566                 
567                 bool wroteSomething = false;
568                 int selectedCount = 0;
569                 
570         if (m->debug) { set<string> temp; sanity["qual"] = temp; }
571                 
572                 while(!in.eof()){       
573                         string saveName = "";
574                         string name = "";
575                         string scores = "";
576                         
577                         in >> name;
578             
579             if (!dups) {//adjust name if needed
580                 map<string, string>::iterator it = uniqueMap.find(name);
581                 if (it != uniqueMap.end()) { name = it->second; }
582             }
583                                 
584                         if (name.length() != 0) { 
585                                 saveName = name.substr(1);
586                                 while (!in.eof())       {       
587                                         char c = in.get(); 
588                                         if (c == 10 || c == 13 || c == -1){     break;  }
589                                         else { name += c; }     
590                                 } 
591                                 m->gobble(in);
592                         }
593                         
594                         while(in){
595                                 char letter= in.get();
596                                 if(letter == '>'){      in.putback(letter);     break;  }
597                                 else{ scores += letter; }
598                         }
599                         
600                         m->gobble(in);
601                         
602                         if (names.count(saveName) != 0) {
603                                 wroteSomething = true;
604                                                 
605                                 out << name << endl << scores;
606                                 selectedCount++;
607                 if (m->debug) { sanity["qual"].insert(name); }
608                         }
609                         
610                         m->gobble(in);
611                 }
612                 in.close();
613                 out.close();
614                 
615                 
616                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
617                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
618                 
619                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
620
621                 
622                 return 0;
623                 
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "GetSeqsCommand", "readQual");
627                 exit(1);
628         }
629 }
630 //**********************************************************************************************************************
631 int GetSeqsCommand::readCount(){
632         try {
633                 string thisOutputDir = outputDir;
634                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
635                 map<string, string> variables; 
636                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
637         variables["[extension]"] = m->getExtension(countfile);
638                 string outputFileName = getOutputFileName("count", variables);
639                 
640                 ofstream out;
641                 m->openOutputFile(outputFileName, out);
642                 
643                 ifstream in;
644                 m->openInputFile(countfile, in);
645                 
646                 bool wroteSomething = false;
647                 int selectedCount = 0;
648                 
649         string headers = m->getline(in); m->gobble(in);
650         out << headers << endl;
651         
652         string name, rest; int thisTotal;
653         while (!in.eof()) {
654             
655             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
656             
657             in >> name; m->gobble(in); 
658             in >> thisTotal; m->gobble(in);
659             rest = m->getline(in); m->gobble(in);
660             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
661             
662             if (names.count(name) != 0) {
663                 out << name << '\t' << thisTotal << '\t' << rest << endl;
664                 wroteSomething = true;
665                 selectedCount+= thisTotal;
666             }
667         }
668         in.close();
669                 out.close();
670         
671         //check for groups that have been eliminated
672         CountTable ct;
673         if (ct.testGroups(outputFileName)) {
674             ct.readTable(outputFileName, true, false);
675             ct.printTable(outputFileName);
676         }
677                 
678                 if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
679                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
680                 
681                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
682         
683                 return 0;
684         }
685         catch(exception& e) {
686                 m->errorOut(e, "GetSeqsCommand", "readCount");
687                 exit(1);
688         }
689 }
690
691 //**********************************************************************************************************************
692 int GetSeqsCommand::readList(){
693         try {
694                 string thisOutputDir = outputDir;
695                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
696         map<string, string> variables; 
697                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
698         variables["[extension]"] = m->getExtension(listfile);
699                 string outputFileName = getOutputFileName("list", variables);
700                 ofstream out;
701                 m->openOutputFile(outputFileName, out);
702                 
703                 ifstream in;
704                 m->openInputFile(listfile, in);
705                 
706                 bool wroteSomething = false;
707                 int selectedCount = 0;
708         
709         if (m->debug) { set<string> temp; sanity["list"] = temp; }
710                 
711                 while(!in.eof()){
712                         
713                         selectedCount = 0;
714                         
715                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
716
717                         //read in list vector
718                         ListVector list(in);
719                         
720                         //make a new list vector
721                         ListVector newList;
722                         newList.setLabel(list.getLabel());
723                         
724                         //for each bin
725                         for (int i = 0; i < list.getNumBins(); i++) {
726                         
727                                 //parse out names that are in accnos file
728                                 string binnames = list.get(i);
729                 vector<string> bnames;
730                 m->splitAtComma(binnames, bnames);
731                                 
732                                 string newNames = "";
733                 for (int i = 0; i < bnames.size(); i++) {
734                                         string name = bnames[i];
735                                         //if that name is in the .accnos file, add it
736                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
737                                 }
738                         
739                                 //if there are names in this bin add to new list
740                                 if (newNames != "") { 
741                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
742                                         newList.push_back(newNames);    
743                                 }
744                         }
745                                 
746                         //print new listvector
747                         if (newList.getNumBins() != 0) {
748                                 wroteSomething = true;
749                                 newList.print(out);
750                         }
751                         
752                         m->gobble(in);
753                 }
754                 in.close();     
755                 out.close();
756                 
757                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
758                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
759                 
760                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
761                 
762                 return 0;
763
764         }
765         catch(exception& e) {
766                 m->errorOut(e, "GetSeqsCommand", "readList");
767                 exit(1);
768         }
769 }
770 //**********************************************************************************************************************
771 int GetSeqsCommand::readName(){
772         try {
773                 string thisOutputDir = outputDir;
774                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
775         map<string, string> variables; 
776                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
777         variables["[extension]"] = m->getExtension(namefile);
778                 string outputFileName = getOutputFileName("name", variables);
779                 ofstream out;
780                 m->openOutputFile(outputFileName, out);
781                 
782
783                 ifstream in;
784                 m->openInputFile(namefile, in);
785                 string name, firstCol, secondCol;
786                 
787                 bool wroteSomething = false;
788                 int selectedCount = 0;
789         
790         if (m->debug) { set<string> temp; sanity["name"] = temp; }
791         if (m->debug) { set<string> temp; sanity["dupname"] = temp; }
792                 
793                 while(!in.eof()){
794                 
795                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
796
797                         in >> firstCol;                 m->gobble(in);
798                         in >> secondCol;
799                         
800                         string hold = "";
801                         if (dups) { hold = secondCol; }
802                         
803                         vector<string> parsedNames;
804                         m->splitAtComma(secondCol, parsedNames);
805                         
806                         vector<string> validSecond;
807                         for (int i = 0; i < parsedNames.size(); i++) {
808                                 if (names.count(parsedNames[i]) != 0) {
809                                         validSecond.push_back(parsedNames[i]);
810                     if (m->debug) { sanity["dupname"].insert(parsedNames[i]); }
811                                 }
812                         }
813
814                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
815                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]); if (m->debug) { sanity["dupname"].insert(parsedNames[i]); } }
816                                 out << firstCol << '\t' << hold << endl;
817                                 wroteSomething = true;
818                                 selectedCount += parsedNames.size();
819                 if (m->debug) { sanity["name"].insert(firstCol); }
820                         }else {
821                 
822                                 selectedCount += validSecond.size();
823                                 
824                                 //if the name in the first column is in the set then print it and any other names in second column also in set
825                                 if (names.count(firstCol) != 0) {
826                                 
827                                         wroteSomething = true;
828                                         
829                                         out << firstCol << '\t';
830                                         
831                                         //you know you have at least one valid second since first column is valid
832                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
833                                         out << validSecond[validSecond.size()-1] << endl;
834                     
835                     if (m->debug) { sanity["name"].insert(firstCol); }
836                                         
837                                 
838                                 //make first name in set you come to first column and then add the remaining names to second column
839                                 }else {
840                     
841                                         //you want part of this row
842                                         if (validSecond.size() != 0) {
843                                         
844                                                 wroteSomething = true;
845                                                 
846                                                 out << validSecond[0] << '\t';
847                         //we are changing the unique name in the fasta file
848                         uniqueMap[firstCol] = validSecond[0];
849                                         
850                                                 //you know you have at least one valid second since first column is valid
851                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
852                                                 out << validSecond[validSecond.size()-1] << endl;
853                         
854                         if (m->debug) { sanity["name"].insert(validSecond[0]); }
855                                         }
856                                 }
857                         }
858                         m->gobble(in);
859                 }
860                 in.close();
861                 out.close();
862                 
863                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
864                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
865                 
866                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
867                 
868                 return 0;
869                 
870         }
871         catch(exception& e) {
872                 m->errorOut(e, "GetSeqsCommand", "readName");
873                 exit(1);
874         }
875 }
876
877 //**********************************************************************************************************************
878 int GetSeqsCommand::readGroup(){
879         try {
880                 string thisOutputDir = outputDir;
881                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
882                 map<string, string> variables; 
883                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
884         variables["[extension]"] = m->getExtension(groupfile);
885                 string outputFileName = getOutputFileName("group", variables);
886                 ofstream out;
887                 m->openOutputFile(outputFileName, out);
888                 
889
890                 ifstream in;
891                 m->openInputFile(groupfile, in);
892                 string name, group;
893                 
894                 bool wroteSomething = false;
895                 int selectedCount = 0;
896         
897         if (m->debug) { set<string> temp; sanity["group"] = temp; }
898                 
899                 while(!in.eof()){
900
901                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
902
903
904                         in >> name;                             //read from first column
905                         in >> group;                    //read from second column
906             
907                         
908                         //if this name is in the accnos file
909                         if (names.count(name) != 0) {
910                                 wroteSomething = true;
911                                 
912                                 out << name << '\t' << group << endl;
913                                 selectedCount++;
914                 
915                 if (m->debug) {  sanity["group"].insert(name); }
916                         }
917                                         
918                         m->gobble(in);
919                 }
920                 in.close();
921                 out.close();
922                 
923                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
924                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
925                 
926                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
927
928                 
929                 return 0;
930
931         }
932         catch(exception& e) {
933                 m->errorOut(e, "GetSeqsCommand", "readGroup");
934                 exit(1);
935         }
936 }
937 //**********************************************************************************************************************
938 int GetSeqsCommand::readTax(){
939         try {
940                 string thisOutputDir = outputDir;
941                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
942                 map<string, string> variables; 
943                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
944         variables["[extension]"] = m->getExtension(taxfile);
945                 string outputFileName = getOutputFileName("taxonomy", variables);
946                 ofstream out;
947                 m->openOutputFile(outputFileName, out);
948                 
949                 ifstream in;
950                 m->openInputFile(taxfile, in);
951                 string name, tax;
952                 
953                 bool wroteSomething = false;
954                 int selectedCount = 0;
955         
956         if (m->debug) { set<string> temp; sanity["tax"] = temp; }
957                 
958                 while(!in.eof()){
959
960                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
961
962                         in >> name;                             //read from first column
963                         in >> tax;                      //read from second column
964             
965             if (!dups) {//adjust name if needed
966                 map<string, string>::iterator it = uniqueMap.find(name);
967                 if (it != uniqueMap.end()) { name = it->second; }
968             }
969                         
970                         //if this name is in the accnos file
971                         if (names.count(name) != 0) {
972                                 wroteSomething = true;
973                                 
974                                 out << name << '\t' << tax << endl;
975                                 selectedCount++;
976                 
977                 if (m->debug) { sanity["tax"].insert(name); }
978                         }
979                                         
980                         m->gobble(in);
981                 }
982                 in.close();
983                 out.close();
984                 
985                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
986                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
987                 
988                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
989                         
990                 return 0;
991
992         }
993         catch(exception& e) {
994                 m->errorOut(e, "GetSeqsCommand", "readTax");
995                 exit(1);
996         }
997 }
998 //**********************************************************************************************************************
999 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
1000 int GetSeqsCommand::readAlign(){
1001         try {
1002                 string thisOutputDir = outputDir;
1003                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
1004         map<string, string> variables; 
1005                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1006                 string outputFileName = getOutputFileName("alignreport", variables);
1007                 ofstream out;
1008                 m->openOutputFile(outputFileName, out);
1009                 
1010
1011                 ifstream in;
1012                 m->openInputFile(alignfile, in);
1013                 string name, junk;
1014                 
1015                 bool wroteSomething = false;
1016                 int selectedCount = 0;
1017                 
1018                 //read column headers
1019                 for (int i = 0; i < 16; i++) {  
1020                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1021                         else                    {       break;                  }
1022                 }
1023                 out << endl;
1024                 
1025                 while(!in.eof()){
1026                 
1027                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1028
1029
1030                         in >> name;                             //read from first column
1031             
1032             if (!dups) {//adjust name if needed
1033                 map<string, string>::iterator it = uniqueMap.find(name);
1034                 if (it != uniqueMap.end()) { name = it->second; }
1035             }
1036                         
1037                         //if this name is in the accnos file
1038                         if (names.count(name) != 0) {
1039                                 wroteSomething = true;
1040                                 selectedCount++;
1041                                 
1042                                 out << name << '\t';
1043                                 
1044                                 //read rest
1045                                 for (int i = 0; i < 15; i++) {  
1046                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1047                                         else                    {       break;                  }
1048                                 }
1049                                 out << endl;
1050                                 
1051                         }else {//still read just don't do anything with it
1052                                 //read rest
1053                                 for (int i = 0; i < 15; i++) {  
1054                                         if (!in.eof())  {       in >> junk;             }
1055                                         else                    {       break;                  }
1056                                 }
1057                         }
1058                         
1059                         m->gobble(in);
1060                 }
1061                 in.close();
1062                 out.close();
1063                 
1064                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
1065                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
1066                 
1067                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
1068                 
1069                 return 0;
1070                 
1071         }
1072         catch(exception& e) {
1073                 m->errorOut(e, "GetSeqsCommand", "readAlign");
1074                 exit(1);
1075         }
1076 }
1077 //**********************************************************************************************************************
1078 //just looking at common mistakes. 
1079 int GetSeqsCommand::runSanityCheck(){
1080         try {
1081         string thisOutputDir = outputDir;
1082         if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
1083         string filename = outputDir + "get.seqs.debug.report";
1084         
1085         ofstream out;
1086                 m->openOutputFile(filename, out); 
1087
1088        
1089         //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs
1090         if (fastafile != "") {
1091             if (namefile != "") { //compare with fasta
1092                 if (sanity["fasta"] != sanity["name"]) { //create mismatch file
1093                     createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]);
1094                 }
1095             }
1096             if (qualfile != "") {
1097                 if (sanity["fasta"] != sanity["qual"]) { //create mismatch file
1098                     createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]);
1099                 }
1100             }
1101             if (taxfile != "") {
1102                 if (sanity["fasta"] != sanity["tax"]) { //create mismatch file
1103                     createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]);
1104                 }
1105             }
1106         }
1107         
1108         //compare dupnames, groups and list if given to make sure they match
1109         if (namefile != "") {
1110             if (groupfile != "") {
1111                 if (sanity["dupname"] != sanity["group"]) { //create mismatch file
1112                     createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]);
1113                 } 
1114             }
1115             if (listfile != "") {
1116                 if (sanity["dupname"] != sanity["list"]) { //create mismatch file
1117                     createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]);
1118                 } 
1119             }
1120         }else{
1121
1122             if ((groupfile != "") && (fastafile != "")) {
1123                 if (sanity["fasta"] != sanity["group"]) { //create mismatch file
1124                     createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]);
1125                 } 
1126             }
1127         }
1128         
1129         out.close();
1130         
1131         if (m->isBlank(filename)) { m->mothurRemove(filename); }
1132         else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); }
1133         
1134         return 0;
1135     }
1136         catch(exception& e) {
1137                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1138                 exit(1);
1139         }
1140 }
1141 //**********************************************************************************************************************
1142 //just looking at common mistakes. 
1143 int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set<string> set1, set<string> set2){
1144         try {
1145         out << "****************************************" << endl << endl;
1146         out << "Names unique to " << filename1 << ":\n";
1147         
1148         //remove names in set1 that are also in set2
1149         for (set<string>::iterator it = set1.begin(); it != set1.end();) {
1150             string name = *it;
1151             
1152             if (set2.count(name) == 0)  { out << name << endl;  } //name unique to set1
1153             else                        { set2.erase(name);     } //you are in both so erase 
1154             set1.erase(it++);
1155         }
1156         
1157         out << "\nNames unique to " << filename2 << ":\n";
1158         //output results
1159         for (set<string>::iterator it = set2.begin(); it != set2.end(); it++) {  out << *it << endl;  }
1160         
1161         out << "****************************************" << endl << endl;
1162         
1163         return 0;
1164     }
1165         catch(exception& e) {
1166                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1167                 exit(1);
1168         }
1169 }
1170 //**********************************************************************************************************************
1171
1172 int GetSeqsCommand::compareAccnos(){
1173         try {
1174                 
1175                 string thisOutputDir = outputDir;
1176                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
1177         map<string, string> variables; 
1178                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile));
1179                 string outputFileName = getOutputFileName("accnosreport", variables);
1180                 
1181                 ofstream out;
1182                 m->openOutputFile(outputFileName, out);
1183                 
1184                 ifstream in;
1185                 m->openInputFile(accnosfile2, in);
1186                 string name;
1187                 
1188                 set<string> namesAccnos2;
1189                 set<string> namesDups;
1190                 set<string> namesAccnos = names;
1191                 
1192                 map<string, int> nameCount;
1193                 
1194                 if (namefile != "") {
1195                         ifstream inName;
1196                         m->openInputFile(namefile, inName);
1197                         
1198                         
1199                         while(!inName.eof()){
1200                                 
1201                                 if (m->control_pressed) { inName.close(); return 0; }
1202                                 
1203                                 string thisname, repnames;
1204                                 
1205                                 inName >> thisname;             m->gobble(inName);              //read from first column
1206                                 inName >> repnames;                     //read from second column
1207                                 
1208                                 int num = m->getNumNames(repnames);
1209                                 nameCount[thisname] = num;
1210                                 
1211                                 m->gobble(inName);
1212                         }
1213                         inName.close(); 
1214                 }
1215                 
1216                 while(!in.eof()){
1217                         in >> name;
1218                         
1219                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
1220                                 int pos = name.find_last_of('_');
1221                                 string tempName = name;
1222                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
1223                                 if (namesAccnos.count(tempName) == 0){
1224                                         namesAccnos2.insert(name);
1225                                 }else { //you are in both so erase
1226                                         namesAccnos.erase(name);
1227                                         namesDups.insert(name);
1228                                 }
1229                         }else { //you are in both so erase
1230                                 namesAccnos.erase(name);
1231                                 namesDups.insert(name);
1232                         }
1233                         
1234                         m->gobble(in);
1235                 }
1236                 in.close();     
1237                 
1238                 out << "Names in both files : " + toString(namesDups.size()) << endl;
1239                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
1240                 
1241                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
1242                         out << (*it);
1243                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1244                         out << endl;
1245                 }
1246                 
1247                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
1248                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
1249                 
1250                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
1251                         out << (*it);
1252                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1253                         out << endl;
1254                 }
1255                 
1256                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
1257                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
1258                 
1259                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
1260                         out << (*it);
1261                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1262                         out << endl;
1263                 }
1264
1265                 out.close(); 
1266                 
1267                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
1268                 
1269                 return 0;
1270                 
1271         }
1272         catch(exception& e) {
1273                 m->errorOut(e, "GetSeqsCommand", "compareAccnos");
1274                 exit(1);
1275         }
1276 }
1277
1278
1279 //**********************************************************************************************************************
1280