]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
fixes while testing 1.33.0
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> GetSeqsCommand::setParameters(){ 
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
19         CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
25                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
26                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile);
27                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
28                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos2);
32
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "GetSeqsCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string GetSeqsCommand::getHelpString(){ 
44         try {
45                 string helpString = "";
46                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
47                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
48                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport, fastq and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
49                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=true. \n";
50                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
51                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60
61 //**********************************************************************************************************************
62 GetSeqsCommand::GetSeqsCommand(){       
63         try {
64                 abort = true; calledHelp = true;
65                 setParameters();
66                 vector<string> tempOutNames;
67                 outputTypes["fasta"] = tempOutNames;
68         outputTypes["fastq"] = tempOutNames;
69                 outputTypes["taxonomy"] = tempOutNames;
70                 outputTypes["name"] = tempOutNames;
71                 outputTypes["group"] = tempOutNames;
72                 outputTypes["alignreport"] = tempOutNames;
73                 outputTypes["list"] = tempOutNames;
74                 outputTypes["qfile"] = tempOutNames;
75         outputTypes["count"] = tempOutNames;
76                 outputTypes["accnosreport"] = tempOutNames;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 string GetSeqsCommand::getOutputPattern(string type) {
85     try {
86         string pattern = "";
87         
88         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
89         else if (type == "fastq")       {   pattern = "[filename],pick,[extension]";    }
90         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
91         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
92         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
93         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
94         else if (type == "list")        {   pattern = "[filename],[distance],pick,[extension]";    }
95         else if (type == "qfile")       {   pattern = "[filename],pick,[extension]";    }
96         else if (type == "accnosreport")      {   pattern = "[filename],pick.accnos.report";    }
97         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
98         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
99         
100         return pattern;
101     }
102     catch(exception& e) {
103         m->errorOut(e, "GetSeqsCommand", "getOutputPattern");
104         exit(1);
105     }
106 }
107 //**********************************************************************************************************************
108 GetSeqsCommand::GetSeqsCommand(string option)  {
109         try {
110                 abort = false; calledHelp = false;   
111                                 
112                 //allow user to run help
113                 if(option == "help") { help(); abort = true; calledHelp = true; }
114                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
115                 
116                 else {
117                         vector<string> myArray = setParameters();
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         map<string,string>::iterator it;
124                         
125                         //check to make sure all parameters are valid for command
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["fasta"] = tempOutNames;
133             outputTypes["fastq"] = tempOutNames;
134                         outputTypes["taxonomy"] = tempOutNames;
135                         outputTypes["name"] = tempOutNames;
136                         outputTypes["group"] = tempOutNames;
137                         outputTypes["alignreport"] = tempOutNames;
138                         outputTypes["list"] = tempOutNames;
139                         outputTypes["qfile"] = tempOutNames;
140                         outputTypes["accnosreport"] = tempOutNames;
141             outputTypes["count"] = tempOutNames;
142                         
143                         //if the user changes the output directory command factory will send this info to us in the output parameter 
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
148                         if (inputDir == "not found"){   inputDir = "";          }
149                         else {
150                                 string path;
151                                 it = parameters.find("alignreport");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
157                                 }
158                                 
159                                 it = parameters.find("fasta");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
165                                 }
166                                 
167                                 it = parameters.find("accnos");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
173                                 }
174                                 
175                                 it = parameters.find("accnos2");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
181                                 }
182                                 
183                                 it = parameters.find("list");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
189                                 }
190                                 
191                                 it = parameters.find("name");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
197                                 }
198                                 
199                                 it = parameters.find("group");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
205                                 }
206                                 
207                                 it = parameters.find("taxonomy");
208                                 //user has given a template file
209                                 if(it != parameters.end()){ 
210                                         path = m->hasPath(it->second);
211                                         //if the user has not given a path then, add inputdir. else leave path alone.
212                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
213                                 }
214                                 
215                                 it = parameters.find("qfile");
216                                 //user has given a template file
217                                 if(it != parameters.end()){ 
218                                         path = m->hasPath(it->second);
219                                         //if the user has not given a path then, add inputdir. else leave path alone.
220                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
221                                 }
222                 
223                 it = parameters.find("count");
224                                 //user has given a template file
225                                 if(it != parameters.end()){ 
226                                         path = m->hasPath(it->second);
227                                         //if the user has not given a path then, add inputdir. else leave path alone.
228                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
229                                 }
230                 
231                 it = parameters.find("fastq");
232                                 //user has given a template file
233                                 if(it != parameters.end()){
234                                         path = m->hasPath(it->second);
235                                         //if the user has not given a path then, add inputdir. else leave path alone.
236                                         if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
237                                 }
238                         }
239
240                         
241                         //check for required parameters
242                         accnosfile = validParameter.validFile(parameters, "accnos", true);
243                         if (accnosfile == "not open") { abort = true; }
244                         else if (accnosfile == "not found") {  
245                                 accnosfile = m->getAccnosFile(); 
246                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
247                                 else { 
248                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
249                                         abort = true;
250                                 } 
251                         }else { m->setAccnosFile(accnosfile); } 
252                         
253                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
254                         
255                         fastafile = validParameter.validFile(parameters, "fasta", true);
256                         if (fastafile == "not open") { fastafile = ""; abort = true; }
257                         else if (fastafile == "not found") {  fastafile = "";  }
258                         else { m->setFastaFile(fastafile); }
259                         
260                         namefile = validParameter.validFile(parameters, "name", true);
261                         if (namefile == "not open") { namefile = ""; abort = true; }
262                         else if (namefile == "not found") {  namefile = "";  }  
263                         else { m->setNameFile(namefile); }
264                         
265                         groupfile = validParameter.validFile(parameters, "group", true);
266                         if (groupfile == "not open") { abort = true; }
267                         else if (groupfile == "not found") {  groupfile = "";  }        
268                         else { m->setGroupFile(groupfile); }
269                         
270                         alignfile = validParameter.validFile(parameters, "alignreport", true);
271                         if (alignfile == "not open") { abort = true; }
272                         else if (alignfile == "not found") {  alignfile = "";  }
273                         
274                         listfile = validParameter.validFile(parameters, "list", true);
275                         if (listfile == "not open") { abort = true; }
276                         else if (listfile == "not found") {  listfile = "";  }
277                         else { m->setListFile(listfile); }
278                         
279                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
280                         if (taxfile == "not open") { taxfile = ""; abort = true; }
281                         else if (taxfile == "not found") {  taxfile = "";  }
282                         else { m->setTaxonomyFile(taxfile); }
283                         
284                         qualfile = validParameter.validFile(parameters, "qfile", true);
285                         if (qualfile == "not open") { abort = true; }
286                         else if (qualfile == "not found") {  qualfile = "";  }
287                         else { m->setQualFile(qualfile); }
288             
289             fastqfile = validParameter.validFile(parameters, "fastq", true);
290                         if (fastqfile == "not open") { abort = true; }
291                         else if (fastqfile == "not found") {  fastqfile = "";  }
292                         
293                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
294                         if (accnosfile2 == "not open") { abort = true; }
295                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
296                         
297             countfile = validParameter.validFile(parameters, "count", true);
298             if (countfile == "not open") { countfile = ""; abort = true; }
299             else if (countfile == "not found") { countfile = "";  }     
300             else { m->setCountTableFile(countfile); }
301             
302             if ((namefile != "") && (countfile != "")) {
303                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
304             }
305             
306             if ((groupfile != "") && (countfile != "")) {
307                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
308             }
309
310                         
311                         string usedDups = "true";
312                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
313                         dups = m->isTrue(temp);
314                         
315                         if ((fastqfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality, fastq or listfile."); m->mothurOutEndLine(); abort = true; }
316             
317             if (countfile == "") {
318                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
319                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
320                     parser.getNameFile(files);
321                 }
322             }
323                 }
324
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
328                 exit(1);
329         }
330 }
331 //**********************************************************************************************************************
332
333 int GetSeqsCommand::execute(){
334         try {
335                 
336                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
337                 
338                 //get names you want to keep
339                 names = m->readAccnos(accnosfile);
340                 
341                 if (m->control_pressed) { return 0; }
342         
343         if (countfile != "") {
344             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
345                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
346             }
347         }
348                 
349                 //read through the correct file and output lines you want to keep
350                 if (namefile != "")                     {               readName();                     }
351                 if (fastafile != "")            {               readFasta();            }
352         if (fastqfile != "")            {               readFastq();            }
353                 if (groupfile != "")            {               readGroup();            }
354         if (countfile != "")            {               readCount();            }
355                 if (alignfile != "")            {               readAlign();            }
356                 if (listfile != "")                     {               readList();                     }
357                 if (taxfile != "")                      {               readTax();                      }
358                 if (qualfile != "")                     {               readQual();                     }
359                 if (accnosfile2 != "")          {               compareAccnos();        }
360         
361         if (m->debug) { runSanityCheck(); }
362                 
363                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
364                 
365                 
366                 if (outputNames.size() != 0) {
367                         m->mothurOutEndLine();
368                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
369                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
370                         m->mothurOutEndLine();
371                         
372                         //set fasta file as new current fastafile
373                         string current = "";
374                         itTypes = outputTypes.find("fasta");
375                         if (itTypes != outputTypes.end()) {
376                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
377                         }
378                         
379                         itTypes = outputTypes.find("name");
380                         if (itTypes != outputTypes.end()) {
381                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
382                         }
383                         
384                         itTypes = outputTypes.find("group");
385                         if (itTypes != outputTypes.end()) {
386                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
387                         }
388                         
389                         itTypes = outputTypes.find("list");
390                         if (itTypes != outputTypes.end()) {
391                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
392                         }
393                         
394                         itTypes = outputTypes.find("taxonomy");
395                         if (itTypes != outputTypes.end()) {
396                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
397                         }
398                         
399                         itTypes = outputTypes.find("qfile");
400                         if (itTypes != outputTypes.end()) {
401                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
402                         }
403                         
404             itTypes = outputTypes.find("count");
405                         if (itTypes != outputTypes.end()) {
406                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
407                         }
408                 }
409                 
410                 return 0;               
411         }
412
413         catch(exception& e) {
414                 m->errorOut(e, "GetSeqsCommand", "execute");
415                 exit(1);
416         }
417 }
418 //**********************************************************************************************************************
419 int GetSeqsCommand::readFastq(){
420         try {
421                 bool wroteSomething = false;
422                 int selectedCount = 0;
423         
424                 ifstream in;
425                 m->openInputFile(fastqfile, in);
426                 
427                 string thisOutputDir = outputDir;
428                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastqfile);  }
429                 map<string, string> variables;
430         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
431         variables["[extension]"] = m->getExtension(fastqfile);
432                 string outputFileName = getOutputFileName("fastq", variables);
433                 ofstream out;
434                 m->openOutputFile(outputFileName, out);
435
436                 
437                 while(!in.eof()){
438                         
439                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
440                         
441                         //read sequence name
442                         string input = m->getline(in); m->gobble(in);
443                         
444             string outputString = input + "\n";
445             
446                         if (input[0] == '@') {
447                 //get rest of lines
448                 outputString += m->getline(in) + "\n"; m->gobble(in);
449                 outputString += m->getline(in) + "\n"; m->gobble(in);
450                 outputString += m->getline(in) + "\n"; m->gobble(in);
451                 
452                 vector<string> splits = m->splitWhiteSpace(input);
453                 string name = splits[0];
454                 name = name.substr(1);
455                 m->checkName(name);
456                 
457                 if (names.count(name) != 0) {
458                                         wroteSomething = true;
459                     selectedCount++;
460                     out << outputString;
461                 }
462             }
463             
464                         m->gobble(in);
465                 }
466                 in.close();
467                 out.close();
468                 
469                 
470                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
471                 outputNames.push_back(outputFileName);  outputTypes["fastq"].push_back(outputFileName);
472                 
473                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
474                 
475                 return 0;
476         
477         }
478         catch(exception& e) {
479                 m->errorOut(e, "GetSeqsCommand", "readFastq");
480                 exit(1);
481         }
482 }
483
484 //**********************************************************************************************************************
485 int GetSeqsCommand::readFasta(){
486         try {
487                 string thisOutputDir = outputDir;
488                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
489                 map<string, string> variables; 
490         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
491         variables["[extension]"] = m->getExtension(fastafile);
492                 string outputFileName = getOutputFileName("fasta", variables);
493                 ofstream out;
494                 m->openOutputFile(outputFileName, out);
495                 
496                 
497                 ifstream in;
498                 m->openInputFile(fastafile, in);
499                 string name;
500                 
501                 bool wroteSomething = false;
502                 int selectedCount = 0;
503         
504         if (m->debug) { set<string> temp; sanity["fasta"] = temp; }
505                 
506                 while(!in.eof()){
507                 
508                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
509                         
510                         Sequence currSeq(in);
511                         name = currSeq.getName();
512             
513             if (!dups) {//adjust name if needed
514                 map<string, string>::iterator it = uniqueMap.find(name);
515                 if (it != uniqueMap.end()) { currSeq.setName(it->second); }
516             }
517                         
518             name = currSeq.getName();
519             
520                         if (name != "") {
521                                 //if this name is in the accnos file
522                                 if (names.count(name) != 0) {
523                                         wroteSomething = true;
524                                         
525                                         currSeq.printSequence(out);
526                                         selectedCount++;
527                     
528                     if (m->debug) { sanity["fasta"].insert(name); }
529                                 }
530                         }
531                         m->gobble(in);
532                 }
533                 in.close();     
534                 out.close();
535                 
536                 
537                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
538                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
539                 
540                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
541                 
542                 return 0;
543
544         }
545         catch(exception& e) {
546                 m->errorOut(e, "GetSeqsCommand", "readFasta");
547                 exit(1);
548         }
549 }
550 //**********************************************************************************************************************
551 int GetSeqsCommand::readQual(){
552         try {
553                 string thisOutputDir = outputDir;
554                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
555                 map<string, string> variables; 
556         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile));
557         variables["[extension]"] = m->getExtension(qualfile);
558                 string outputFileName = getOutputFileName("qfile", variables);
559                 ofstream out;
560                 m->openOutputFile(outputFileName, out);
561                 
562                 
563                 ifstream in;
564                 m->openInputFile(qualfile, in);
565                 string name;
566                 
567                 bool wroteSomething = false;
568                 int selectedCount = 0;
569                 
570         if (m->debug) { set<string> temp; sanity["qual"] = temp; }
571                 
572                 while(!in.eof()){       
573                         string saveName = "";
574                         string name = "";
575                         string scores = "";
576                         
577                         in >> name;
578             
579             if (!dups) {//adjust name if needed
580                 map<string, string>::iterator it = uniqueMap.find(name);
581                 if (it != uniqueMap.end()) { name = it->second; }
582             }
583                                 
584                         if (name.length() != 0) { 
585                                 saveName = name.substr(1);
586                                 while (!in.eof())       {       
587                                         char c = in.get(); 
588                                         if (c == 10 || c == 13 || c == -1){     break;  }
589                                         else { name += c; }     
590                                 } 
591                                 m->gobble(in);
592                         }
593                         
594                         while(in){
595                                 char letter= in.get();
596                                 if(letter == '>'){      in.putback(letter);     break;  }
597                                 else{ scores += letter; }
598                         }
599                         
600                         m->gobble(in);
601                         
602                         if (names.count(saveName) != 0) {
603                                 wroteSomething = true;
604                                                 
605                                 out << name << endl << scores;
606                                 selectedCount++;
607                 if (m->debug) { sanity["qual"].insert(name); }
608                         }
609                         
610                         m->gobble(in);
611                 }
612                 in.close();
613                 out.close();
614                 
615                 
616                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
617                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
618                 
619                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
620
621                 
622                 return 0;
623                 
624         }
625         catch(exception& e) {
626                 m->errorOut(e, "GetSeqsCommand", "readQual");
627                 exit(1);
628         }
629 }
630 //**********************************************************************************************************************
631 int GetSeqsCommand::readCount(){
632         try {
633                 string thisOutputDir = outputDir;
634                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
635                 map<string, string> variables; 
636                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
637         variables["[extension]"] = m->getExtension(countfile);
638                 string outputFileName = getOutputFileName("count", variables);
639                 
640                 ofstream out;
641                 m->openOutputFile(outputFileName, out);
642                 
643                 ifstream in;
644                 m->openInputFile(countfile, in);
645                 
646                 bool wroteSomething = false;
647                 int selectedCount = 0;
648                 
649         string headers = m->getline(in); m->gobble(in);
650         out << headers << endl;
651         
652         string name, rest; int thisTotal;
653         while (!in.eof()) {
654             
655             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
656             
657             in >> name; m->gobble(in); 
658             in >> thisTotal; m->gobble(in);
659             rest = m->getline(in); m->gobble(in);
660             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
661             
662             if (names.count(name) != 0) {
663                 out << name << '\t' << thisTotal << '\t' << rest << endl;
664                 wroteSomething = true;
665                 selectedCount+= thisTotal;
666             }
667         }
668         in.close();
669                 out.close();
670         
671         //check for groups that have been eliminated
672         CountTable ct;
673         if (ct.testGroups(outputFileName)) {
674             ct.readTable(outputFileName, true, false);
675             ct.printTable(outputFileName);
676         }
677                 
678                 if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
679                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
680                 
681                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
682         
683                 return 0;
684         }
685         catch(exception& e) {
686                 m->errorOut(e, "GetSeqsCommand", "readCount");
687                 exit(1);
688         }
689 }
690
691 //**********************************************************************************************************************
692 int GetSeqsCommand::readList(){
693         try {
694                 string thisOutputDir = outputDir;
695                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
696         map<string, string> variables; 
697                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
698         variables["[extension]"] = m->getExtension(listfile);
699                 
700                 ifstream in;
701                 m->openInputFile(listfile, in);
702                 
703                 bool wroteSomething = false;
704                 int selectedCount = 0;
705         
706         if (m->debug) { set<string> temp; sanity["list"] = temp; }
707                 
708                 while(!in.eof()){
709                         
710                         selectedCount = 0;
711
712                         //read in list vector
713                         ListVector list(in);
714                         
715                         //make a new list vector
716                         ListVector newList;
717                         newList.setLabel(list.getLabel());
718             
719             variables["[distance]"] = list.getLabel();
720             string outputFileName = getOutputFileName("list", variables);
721                         
722                         ofstream out;
723                         m->openOutputFile(outputFileName, out);
724                         outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
725             
726             vector<string> binLabels = list.getLabels();
727             vector<string> newBinLabels;
728             
729             if (m->control_pressed) { in.close(); out.close();  return 0; }
730                         
731                         //for each bin
732                         for (int i = 0; i < list.getNumBins(); i++) {
733                         
734                                 //parse out names that are in accnos file
735                                 string binnames = list.get(i);
736                 vector<string> bnames;
737                 m->splitAtComma(binnames, bnames);
738                                 
739                                 string newNames = "";
740                 for (int j = 0; j < bnames.size(); j++) {
741                                         string name = bnames[j];
742                                         //if that name is in the .accnos file, add it
743                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
744                                 }
745                         
746                                 //if there are names in this bin add to new list
747                                 if (newNames != "") { 
748                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
749                                         newList.push_back(newNames);
750                     newBinLabels.push_back(binLabels[i]);
751                                 }
752                         }
753                                 
754                         //print new listvector
755                         if (newList.getNumBins() != 0) {
756                                 wroteSomething = true;
757                                 newList.setLabels(newBinLabels);
758                 newList.printHeaders(out);
759                                 newList.print(out);
760                         }
761                         
762                         m->gobble(in);
763             out.close();
764                 }
765                 in.close();     
766                 
767                 
768                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
769                 
770                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
771                 
772                 return 0;
773
774         }
775         catch(exception& e) {
776                 m->errorOut(e, "GetSeqsCommand", "readList");
777                 exit(1);
778         }
779 }
780 //**********************************************************************************************************************
781 int GetSeqsCommand::readName(){
782         try {
783                 string thisOutputDir = outputDir;
784                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
785         map<string, string> variables; 
786                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
787         variables["[extension]"] = m->getExtension(namefile);
788                 string outputFileName = getOutputFileName("name", variables);
789                 ofstream out;
790                 m->openOutputFile(outputFileName, out);
791                 
792
793                 ifstream in;
794                 m->openInputFile(namefile, in);
795                 string name, firstCol, secondCol;
796                 
797                 bool wroteSomething = false;
798                 int selectedCount = 0;
799         
800         if (m->debug) { set<string> temp; sanity["name"] = temp; }
801         if (m->debug) { set<string> temp; sanity["dupname"] = temp; }
802                 
803                 while(!in.eof()){
804                 
805                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
806
807                         in >> firstCol;                 m->gobble(in);
808                         in >> secondCol;
809                         
810                         string hold = "";
811                         if (dups) { hold = secondCol; }
812                         
813                         vector<string> parsedNames;
814                         m->splitAtComma(secondCol, parsedNames);
815                         
816                         vector<string> validSecond;
817                         for (int i = 0; i < parsedNames.size(); i++) {
818                                 if (names.count(parsedNames[i]) != 0) {
819                                         validSecond.push_back(parsedNames[i]);
820                     if (m->debug) { sanity["dupname"].insert(parsedNames[i]); }
821                                 }
822                         }
823
824                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
825                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]); if (m->debug) { sanity["dupname"].insert(parsedNames[i]); } }
826                                 out << firstCol << '\t' << hold << endl;
827                                 wroteSomething = true;
828                                 selectedCount += parsedNames.size();
829                 if (m->debug) { sanity["name"].insert(firstCol); }
830                         }else {
831                 
832                                 selectedCount += validSecond.size();
833                                 
834                                 //if the name in the first column is in the set then print it and any other names in second column also in set
835                                 if (names.count(firstCol) != 0) {
836                                 
837                                         wroteSomething = true;
838                                         
839                                         out << firstCol << '\t';
840                                         
841                                         //you know you have at least one valid second since first column is valid
842                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
843                                         out << validSecond[validSecond.size()-1] << endl;
844                     
845                     if (m->debug) { sanity["name"].insert(firstCol); }
846                                         
847                                 
848                                 //make first name in set you come to first column and then add the remaining names to second column
849                                 }else {
850                     
851                                         //you want part of this row
852                                         if (validSecond.size() != 0) {
853                                         
854                                                 wroteSomething = true;
855                                                 
856                                                 out << validSecond[0] << '\t';
857                         //we are changing the unique name in the fasta file
858                         uniqueMap[firstCol] = validSecond[0];
859                                         
860                                                 //you know you have at least one valid second since first column is valid
861                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
862                                                 out << validSecond[validSecond.size()-1] << endl;
863                         
864                         if (m->debug) { sanity["name"].insert(validSecond[0]); }
865                                         }
866                                 }
867                         }
868                         m->gobble(in);
869                 }
870                 in.close();
871                 out.close();
872                 
873                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
874                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
875                 
876                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
877                 
878                 return 0;
879                 
880         }
881         catch(exception& e) {
882                 m->errorOut(e, "GetSeqsCommand", "readName");
883                 exit(1);
884         }
885 }
886
887 //**********************************************************************************************************************
888 int GetSeqsCommand::readGroup(){
889         try {
890                 string thisOutputDir = outputDir;
891                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
892                 map<string, string> variables; 
893                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
894         variables["[extension]"] = m->getExtension(groupfile);
895                 string outputFileName = getOutputFileName("group", variables);
896                 ofstream out;
897                 m->openOutputFile(outputFileName, out);
898                 
899
900                 ifstream in;
901                 m->openInputFile(groupfile, in);
902                 string name, group;
903                 
904                 bool wroteSomething = false;
905                 int selectedCount = 0;
906         
907         if (m->debug) { set<string> temp; sanity["group"] = temp; }
908                 
909                 while(!in.eof()){
910
911                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
912
913
914                         in >> name;                             //read from first column
915                         in >> group;                    //read from second column
916             
917                         
918                         //if this name is in the accnos file
919                         if (names.count(name) != 0) {
920                                 wroteSomething = true;
921                                 
922                                 out << name << '\t' << group << endl;
923                                 selectedCount++;
924                 
925                 if (m->debug) {  sanity["group"].insert(name); }
926                         }
927                                         
928                         m->gobble(in);
929                 }
930                 in.close();
931                 out.close();
932                 
933                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
934                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
935                 
936                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
937
938                 
939                 return 0;
940
941         }
942         catch(exception& e) {
943                 m->errorOut(e, "GetSeqsCommand", "readGroup");
944                 exit(1);
945         }
946 }
947 //**********************************************************************************************************************
948 int GetSeqsCommand::readTax(){
949         try {
950                 string thisOutputDir = outputDir;
951                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
952                 map<string, string> variables; 
953                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
954         variables["[extension]"] = m->getExtension(taxfile);
955                 string outputFileName = getOutputFileName("taxonomy", variables);
956                 ofstream out;
957                 m->openOutputFile(outputFileName, out);
958                 
959                 ifstream in;
960                 m->openInputFile(taxfile, in);
961                 string name, tax;
962                 
963                 bool wroteSomething = false;
964                 int selectedCount = 0;
965         
966         if (m->debug) { set<string> temp; sanity["tax"] = temp; }
967                 
968                 while(!in.eof()){
969
970                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
971
972                         in >> name;                             //read from first column
973                         in >> tax;                      //read from second column
974             
975             if (!dups) {//adjust name if needed
976                 map<string, string>::iterator it = uniqueMap.find(name);
977                 if (it != uniqueMap.end()) { name = it->second; }
978             }
979                         
980                         //if this name is in the accnos file
981                         if (names.count(name) != 0) {
982                                 wroteSomething = true;
983                                 
984                                 out << name << '\t' << tax << endl;
985                                 selectedCount++;
986                 
987                 if (m->debug) { sanity["tax"].insert(name); }
988                         }
989                                         
990                         m->gobble(in);
991                 }
992                 in.close();
993                 out.close();
994                 
995                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
996                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
997                 
998                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
999                         
1000                 return 0;
1001
1002         }
1003         catch(exception& e) {
1004                 m->errorOut(e, "GetSeqsCommand", "readTax");
1005                 exit(1);
1006         }
1007 }
1008 //**********************************************************************************************************************
1009 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
1010 int GetSeqsCommand::readAlign(){
1011         try {
1012                 string thisOutputDir = outputDir;
1013                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
1014         map<string, string> variables; 
1015                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1016                 string outputFileName = getOutputFileName("alignreport", variables);
1017                 ofstream out;
1018                 m->openOutputFile(outputFileName, out);
1019                 
1020
1021                 ifstream in;
1022                 m->openInputFile(alignfile, in);
1023                 string name, junk;
1024                 
1025                 bool wroteSomething = false;
1026                 int selectedCount = 0;
1027                 
1028                 //read column headers
1029                 for (int i = 0; i < 16; i++) {  
1030                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1031                         else                    {       break;                  }
1032                 }
1033                 out << endl;
1034                 
1035                 while(!in.eof()){
1036                 
1037                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1038
1039
1040                         in >> name;                             //read from first column
1041             
1042             if (!dups) {//adjust name if needed
1043                 map<string, string>::iterator it = uniqueMap.find(name);
1044                 if (it != uniqueMap.end()) { name = it->second; }
1045             }
1046                         
1047                         //if this name is in the accnos file
1048                         if (names.count(name) != 0) {
1049                                 wroteSomething = true;
1050                                 selectedCount++;
1051                                 
1052                                 out << name << '\t';
1053                                 
1054                                 //read rest
1055                                 for (int i = 0; i < 15; i++) {  
1056                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1057                                         else                    {       break;                  }
1058                                 }
1059                                 out << endl;
1060                                 
1061                         }else {//still read just don't do anything with it
1062                                 //read rest
1063                                 for (int i = 0; i < 15; i++) {  
1064                                         if (!in.eof())  {       in >> junk;             }
1065                                         else                    {       break;                  }
1066                                 }
1067                         }
1068                         
1069                         m->gobble(in);
1070                 }
1071                 in.close();
1072                 out.close();
1073                 
1074                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
1075                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
1076                 
1077                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
1078                 
1079                 return 0;
1080                 
1081         }
1082         catch(exception& e) {
1083                 m->errorOut(e, "GetSeqsCommand", "readAlign");
1084                 exit(1);
1085         }
1086 }
1087 //**********************************************************************************************************************
1088 //just looking at common mistakes. 
1089 int GetSeqsCommand::runSanityCheck(){
1090         try {
1091         string thisOutputDir = outputDir;
1092         if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
1093         string filename = outputDir + "get.seqs.debug.report";
1094         
1095         ofstream out;
1096                 m->openOutputFile(filename, out); 
1097
1098        
1099         //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs
1100         if (fastafile != "") {
1101             if (namefile != "") { //compare with fasta
1102                 if (sanity["fasta"] != sanity["name"]) { //create mismatch file
1103                     createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]);
1104                 }
1105             }
1106             if (qualfile != "") {
1107                 if (sanity["fasta"] != sanity["qual"]) { //create mismatch file
1108                     createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]);
1109                 }
1110             }
1111             if (taxfile != "") {
1112                 if (sanity["fasta"] != sanity["tax"]) { //create mismatch file
1113                     createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]);
1114                 }
1115             }
1116         }
1117         
1118         //compare dupnames, groups and list if given to make sure they match
1119         if (namefile != "") {
1120             if (groupfile != "") {
1121                 if (sanity["dupname"] != sanity["group"]) { //create mismatch file
1122                     createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]);
1123                 } 
1124             }
1125             if (listfile != "") {
1126                 if (sanity["dupname"] != sanity["list"]) { //create mismatch file
1127                     createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]);
1128                 } 
1129             }
1130         }else{
1131
1132             if ((groupfile != "") && (fastafile != "")) {
1133                 if (sanity["fasta"] != sanity["group"]) { //create mismatch file
1134                     createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]);
1135                 } 
1136             }
1137         }
1138         
1139         out.close();
1140         
1141         if (m->isBlank(filename)) { m->mothurRemove(filename); }
1142         else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); }
1143         
1144         return 0;
1145     }
1146         catch(exception& e) {
1147                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1148                 exit(1);
1149         }
1150 }
1151 //**********************************************************************************************************************
1152 //just looking at common mistakes. 
1153 int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set<string> set1, set<string> set2){
1154         try {
1155         out << "****************************************" << endl << endl;
1156         out << "Names unique to " << filename1 << ":\n";
1157         
1158         //remove names in set1 that are also in set2
1159         for (set<string>::iterator it = set1.begin(); it != set1.end();) {
1160             string name = *it;
1161             
1162             if (set2.count(name) == 0)  { out << name << endl;  } //name unique to set1
1163             else                        { set2.erase(name);     } //you are in both so erase 
1164             set1.erase(it++);
1165         }
1166         
1167         out << "\nNames unique to " << filename2 << ":\n";
1168         //output results
1169         for (set<string>::iterator it = set2.begin(); it != set2.end(); it++) {  out << *it << endl;  }
1170         
1171         out << "****************************************" << endl << endl;
1172         
1173         return 0;
1174     }
1175         catch(exception& e) {
1176                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1177                 exit(1);
1178         }
1179 }
1180 //**********************************************************************************************************************
1181
1182 int GetSeqsCommand::compareAccnos(){
1183         try {
1184                 
1185                 string thisOutputDir = outputDir;
1186                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
1187         map<string, string> variables; 
1188                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile));
1189                 string outputFileName = getOutputFileName("accnosreport", variables);
1190                 
1191                 ofstream out;
1192                 m->openOutputFile(outputFileName, out);
1193                 
1194                 ifstream in;
1195                 m->openInputFile(accnosfile2, in);
1196                 string name;
1197                 
1198                 set<string> namesAccnos2;
1199                 set<string> namesDups;
1200                 set<string> namesAccnos = names;
1201                 
1202                 map<string, int> nameCount;
1203                 
1204                 if (namefile != "") {
1205                         ifstream inName;
1206                         m->openInputFile(namefile, inName);
1207                         
1208                         
1209                         while(!inName.eof()){
1210                                 
1211                                 if (m->control_pressed) { inName.close(); return 0; }
1212                                 
1213                                 string thisname, repnames;
1214                                 
1215                                 inName >> thisname;             m->gobble(inName);              //read from first column
1216                                 inName >> repnames;                     //read from second column
1217                                 
1218                                 int num = m->getNumNames(repnames);
1219                                 nameCount[thisname] = num;
1220                                 
1221                                 m->gobble(inName);
1222                         }
1223                         inName.close(); 
1224                 }
1225                 
1226                 while(!in.eof()){
1227                         in >> name;
1228                         
1229                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
1230                                 int pos = name.find_last_of('_');
1231                                 string tempName = name;
1232                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
1233                                 if (namesAccnos.count(tempName) == 0){
1234                                         namesAccnos2.insert(name);
1235                                 }else { //you are in both so erase
1236                                         namesAccnos.erase(name);
1237                                         namesDups.insert(name);
1238                                 }
1239                         }else { //you are in both so erase
1240                                 namesAccnos.erase(name);
1241                                 namesDups.insert(name);
1242                         }
1243                         
1244                         m->gobble(in);
1245                 }
1246                 in.close();     
1247                 
1248                 out << "Names in both files : " + toString(namesDups.size()) << endl;
1249                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
1250                 
1251                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
1252                         out << (*it);
1253                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1254                         out << endl;
1255                 }
1256                 
1257                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
1258                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
1259                 
1260                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
1261                         out << (*it);
1262                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1263                         out << endl;
1264                 }
1265                 
1266                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
1267                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
1268                 
1269                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
1270                         out << (*it);
1271                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1272                         out << endl;
1273                 }
1274
1275                 out.close(); 
1276                 
1277                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
1278                 
1279                 return 0;
1280                 
1281         }
1282         catch(exception& e) {
1283                 m->errorOut(e, "GetSeqsCommand", "compareAccnos");
1284                 exit(1);
1285         }
1286 }
1287
1288
1289 //**********************************************************************************************************************
1290