]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
45                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 GetSeqsCommand::GetSeqsCommand(){       
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71                 outputTypes["accnosreport"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 GetSeqsCommand::GetSeqsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["alignreport"] = tempOutNames;
108                         outputTypes["list"] = tempOutNames;
109                         outputTypes["qfile"] = tempOutNames;
110                         outputTypes["accnosreport"] = tempOutNames;
111                         
112                         //if the user changes the output directory command factory will send this info to us in the output parameter 
113                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("alignreport");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
126                                 }
127                                 
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("accnos");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
142                                 }
143                                 
144                                 it = parameters.find("accnos2");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
150                                 }
151                                 
152                                 it = parameters.find("list");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                                 
176                                 it = parameters.find("taxonomy");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
182                                 }
183                                 
184                                 it = parameters.find("qfile");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
190                                 }
191                         }
192
193                         
194                         //check for required parameters
195                         accnosfile = validParameter.validFile(parameters, "accnos", true);
196                         if (accnosfile == "not open") { abort = true; }
197                         else if (accnosfile == "not found") {  
198                                 accnosfile = m->getAccnosFile(); 
199                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
200                                 else { 
201                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
202                                         abort = true;
203                                 } 
204                         }else { m->setAccnosFile(accnosfile); } 
205                         
206                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
207                         
208                         fastafile = validParameter.validFile(parameters, "fasta", true);
209                         if (fastafile == "not open") { fastafile = ""; abort = true; }
210                         else if (fastafile == "not found") {  fastafile = "";  }
211                         else { m->setFastaFile(fastafile); }
212                         
213                         namefile = validParameter.validFile(parameters, "name", true);
214                         if (namefile == "not open") { namefile = ""; abort = true; }
215                         else if (namefile == "not found") {  namefile = "";  }  
216                         else { m->setNameFile(namefile); }
217                         
218                         groupfile = validParameter.validFile(parameters, "group", true);
219                         if (groupfile == "not open") { abort = true; }
220                         else if (groupfile == "not found") {  groupfile = "";  }        
221                         else { m->setGroupFile(groupfile); }
222                         
223                         alignfile = validParameter.validFile(parameters, "alignreport", true);
224                         if (alignfile == "not open") { abort = true; }
225                         else if (alignfile == "not found") {  alignfile = "";  }
226                         
227                         listfile = validParameter.validFile(parameters, "list", true);
228                         if (listfile == "not open") { abort = true; }
229                         else if (listfile == "not found") {  listfile = "";  }
230                         else { m->setListFile(listfile); }
231                         
232                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
233                         if (taxfile == "not open") { taxfile = ""; abort = true; }
234                         else if (taxfile == "not found") {  taxfile = "";  }
235                         else { m->setTaxonomyFile(taxfile); }
236                         
237                         qualfile = validParameter.validFile(parameters, "qfile", true);
238                         if (qualfile == "not open") { abort = true; }
239                         else if (qualfile == "not found") {  qualfile = "";  }
240                         else { m->setQualFile(qualfile); }
241                         
242                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
243                         if (accnosfile2 == "not open") { abort = true; }
244                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
245                         
246                         
247                         string usedDups = "true";
248                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
249                         dups = m->isTrue(temp);
250                         
251                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
252                 
253                         if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
254                                 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
255                                 parser.getNameFile(files);
256                         }
257                 }
258
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
262                 exit(1);
263         }
264 }
265 //**********************************************************************************************************************
266
267 int GetSeqsCommand::execute(){
268         try {
269                 
270                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
271                 
272                 //get names you want to keep
273                 readAccnos();
274                 
275                 if (m->control_pressed) { return 0; }
276                 
277                 //read through the correct file and output lines you want to keep
278                 if (namefile != "")                     {               readName();                     }
279                 if (fastafile != "")            {               readFasta();            }
280                 if (groupfile != "")            {               readGroup();            }
281                 if (alignfile != "")            {               readAlign();            }
282                 if (listfile != "")                     {               readList();                     }
283                 if (taxfile != "")                      {               readTax();                      }
284                 if (qualfile != "")                     {               readQual();                     }
285                 if (accnosfile2 != "")          {               compareAccnos();        }
286         
287         if (m->debug) { runSanityCheck(); }
288                 
289                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
290                 
291                 
292                 if (outputNames.size() != 0) {
293                         m->mothurOutEndLine();
294                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
295                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
296                         m->mothurOutEndLine();
297                         
298                         //set fasta file as new current fastafile
299                         string current = "";
300                         itTypes = outputTypes.find("fasta");
301                         if (itTypes != outputTypes.end()) {
302                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
303                         }
304                         
305                         itTypes = outputTypes.find("name");
306                         if (itTypes != outputTypes.end()) {
307                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
308                         }
309                         
310                         itTypes = outputTypes.find("group");
311                         if (itTypes != outputTypes.end()) {
312                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
313                         }
314                         
315                         itTypes = outputTypes.find("list");
316                         if (itTypes != outputTypes.end()) {
317                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
318                         }
319                         
320                         itTypes = outputTypes.find("taxonomy");
321                         if (itTypes != outputTypes.end()) {
322                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
323                         }
324                         
325                         itTypes = outputTypes.find("qfile");
326                         if (itTypes != outputTypes.end()) {
327                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
328                         }
329                         
330                 }
331                 
332                 return 0;               
333         }
334
335         catch(exception& e) {
336                 m->errorOut(e, "GetSeqsCommand", "execute");
337                 exit(1);
338         }
339 }
340
341 //**********************************************************************************************************************
342 int GetSeqsCommand::readFasta(){
343         try {
344                 string thisOutputDir = outputDir;
345                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
346                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
347                 ofstream out;
348                 m->openOutputFile(outputFileName, out);
349                 
350                 
351                 ifstream in;
352                 m->openInputFile(fastafile, in);
353                 string name;
354                 
355                 bool wroteSomething = false;
356                 int selectedCount = 0;
357         
358         if (m->debug) { set<string> temp; sanity["fasta"] = temp; }
359                 
360                 while(!in.eof()){
361                 
362                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
363                         
364                         Sequence currSeq(in);
365                         name = currSeq.getName();
366                         
367                         if (name != "") {
368                                 //if this name is in the accnos file
369                                 if (names.count(name) != 0) {
370                                         wroteSomething = true;
371                                         
372                                         currSeq.printSequence(out);
373                                         selectedCount++;
374                     
375                     if (m->debug) { sanity["fasta"].insert(name); }
376                                 }
377                         }
378                         m->gobble(in);
379                 }
380                 in.close();     
381                 out.close();
382                 
383                 
384                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
385                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
386                 
387                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
388                 
389                 return 0;
390
391         }
392         catch(exception& e) {
393                 m->errorOut(e, "GetSeqsCommand", "readFasta");
394                 exit(1);
395         }
396 }
397 //**********************************************************************************************************************
398 int GetSeqsCommand::readQual(){
399         try {
400                 string thisOutputDir = outputDir;
401                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
402                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
403                 ofstream out;
404                 m->openOutputFile(outputFileName, out);
405                 
406                 
407                 ifstream in;
408                 m->openInputFile(qualfile, in);
409                 string name;
410                 
411                 bool wroteSomething = false;
412                 int selectedCount = 0;
413                 
414         if (m->debug) { set<string> temp; sanity["qual"] = temp; }
415                 
416                 while(!in.eof()){       
417                         string saveName = "";
418                         string name = "";
419                         string scores = "";
420                         
421                         in >> name; 
422                                 
423                         if (name.length() != 0) { 
424                                 saveName = name.substr(1);
425                                 while (!in.eof())       {       
426                                         char c = in.get(); 
427                                         if (c == 10 || c == 13){        break;  }
428                                         else { name += c; }     
429                                 } 
430                                 m->gobble(in);
431                         }
432                         
433                         while(in){
434                                 char letter= in.get();
435                                 if(letter == '>'){      in.putback(letter);     break;  }
436                                 else{ scores += letter; }
437                         }
438                         
439                         m->gobble(in);
440                         
441                         if (names.count(saveName) != 0) {
442                                 wroteSomething = true;
443                                                 
444                                 out << name << endl << scores;
445                                 selectedCount++;
446                 if (m->debug) { sanity["qual"].insert(name); }
447                         }
448                         
449                         m->gobble(in);
450                 }
451                 in.close();
452                 out.close();
453                 
454                 
455                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
456                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
457                 
458                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
459
460                 
461                 return 0;
462                 
463         }
464         catch(exception& e) {
465                 m->errorOut(e, "GetSeqsCommand", "readQual");
466                 exit(1);
467         }
468 }
469 //**********************************************************************************************************************
470 int GetSeqsCommand::readList(){
471         try {
472                 string thisOutputDir = outputDir;
473                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
474                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
475                 ofstream out;
476                 m->openOutputFile(outputFileName, out);
477                 
478                 ifstream in;
479                 m->openInputFile(listfile, in);
480                 
481                 bool wroteSomething = false;
482                 int selectedCount = 0;
483         
484         if (m->debug) { set<string> temp; sanity["list"] = temp; }
485                 
486                 while(!in.eof()){
487                         
488                         selectedCount = 0;
489                         
490                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
491
492                         //read in list vector
493                         ListVector list(in);
494                         
495                         //make a new list vector
496                         ListVector newList;
497                         newList.setLabel(list.getLabel());
498                         
499                         //for each bin
500                         for (int i = 0; i < list.getNumBins(); i++) {
501                         
502                                 //parse out names that are in accnos file
503                                 string binnames = list.get(i);
504                                 
505                                 string newNames = "";
506                                 while (binnames.find_first_of(',') != -1) { 
507                                         string name = binnames.substr(0,binnames.find_first_of(','));
508                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
509                                         
510                                         //if that name is in the .accnos file, add it
511                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
512                                 }
513                         
514                                 //get last name
515                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  if (m->debug) { sanity["list"].insert(binnames); } }
516
517                                 //if there are names in this bin add to new list
518                                 if (newNames != "") { 
519                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
520                                         newList.push_back(newNames);    
521                                 }
522                         }
523                                 
524                         //print new listvector
525                         if (newList.getNumBins() != 0) {
526                                 wroteSomething = true;
527                                 newList.print(out);
528                         }
529                         
530                         m->gobble(in);
531                 }
532                 in.close();     
533                 out.close();
534                 
535                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
536                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
537                 
538                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
539                 
540                 return 0;
541
542         }
543         catch(exception& e) {
544                 m->errorOut(e, "GetSeqsCommand", "readList");
545                 exit(1);
546         }
547 }
548 //**********************************************************************************************************************
549 int GetSeqsCommand::readName(){
550         try {
551                 string thisOutputDir = outputDir;
552                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
553                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
554                 ofstream out;
555                 m->openOutputFile(outputFileName, out);
556                 
557
558                 ifstream in;
559                 m->openInputFile(namefile, in);
560                 string name, firstCol, secondCol;
561                 
562                 bool wroteSomething = false;
563                 int selectedCount = 0;
564         
565         if (m->debug) { set<string> temp; sanity["name"] = temp; }
566         if (m->debug) { set<string> temp; sanity["dupname"] = temp; }
567                 
568                 while(!in.eof()){
569                 
570                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
571
572                         in >> firstCol;                         
573                         in >> secondCol;
574                         
575                         string hold = "";
576                         if (dups) { hold = secondCol; }
577                         
578                         vector<string> parsedNames;
579                         m->splitAtComma(secondCol, parsedNames);
580                         
581                         vector<string> validSecond;
582                         for (int i = 0; i < parsedNames.size(); i++) {
583                                 if (names.count(parsedNames[i]) != 0) {
584                                         validSecond.push_back(parsedNames[i]);
585                     if (m->debug) { sanity["dupname"].insert(parsedNames[i]); }
586                                 }
587                         }
588
589                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
590                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]); if (m->debug) { sanity["dupname"].insert(parsedNames[i]); } }
591                                 out << firstCol << '\t' << hold << endl;
592                                 wroteSomething = true;
593                                 selectedCount += parsedNames.size();
594                 if (m->debug) { sanity["name"].insert(firstCol); }
595                         }else {
596                                 selectedCount += validSecond.size();
597                                 
598                                 //if the name in the first column is in the set then print it and any other names in second column also in set
599                                 if (names.count(firstCol) != 0) {
600                                 
601                                         wroteSomething = true;
602                                         
603                                         out << firstCol << '\t';
604                                         
605                                         //you know you have at least one valid second since first column is valid
606                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
607                                         out << validSecond[validSecond.size()-1] << endl;
608                     
609                     if (m->debug) { sanity["name"].insert(firstCol); }
610                                         
611                                 
612                                 //make first name in set you come to first column and then add the remaining names to second column
613                                 }else {
614                                         //you want part of this row
615                                         if (validSecond.size() != 0) {
616                                         
617                                                 wroteSomething = true;
618                                                 
619                                                 out << validSecond[0] << '\t';
620                                         
621                                                 //you know you have at least one valid second since first column is valid
622                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
623                                                 out << validSecond[validSecond.size()-1] << endl;
624                         
625                         if (m->debug) { sanity["name"].insert(validSecond[0]); }
626                                         }
627                                 }
628                         }
629                         m->gobble(in);
630                 }
631                 in.close();
632                 out.close();
633                 
634                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
635                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
636                 
637                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
638                 
639                 return 0;
640                 
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "GetSeqsCommand", "readName");
644                 exit(1);
645         }
646 }
647
648 //**********************************************************************************************************************
649 int GetSeqsCommand::readGroup(){
650         try {
651                 string thisOutputDir = outputDir;
652                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
653                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
654                 ofstream out;
655                 m->openOutputFile(outputFileName, out);
656                 
657
658                 ifstream in;
659                 m->openInputFile(groupfile, in);
660                 string name, group;
661                 
662                 bool wroteSomething = false;
663                 int selectedCount = 0;
664         
665         if (m->debug) { set<string> temp; sanity["group"] = temp; }
666                 
667                 while(!in.eof()){
668
669                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
670
671
672                         in >> name;                             //read from first column
673                         in >> group;                    //read from second column
674                         
675                         //if this name is in the accnos file
676                         if (names.count(name) != 0) {
677                                 wroteSomething = true;
678                                 
679                                 out << name << '\t' << group << endl;
680                                 selectedCount++;
681                 
682                 if (m->debug) {  sanity["group"].insert(name); }
683                         }
684                                         
685                         m->gobble(in);
686                 }
687                 in.close();
688                 out.close();
689                 
690                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
691                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
692                 
693                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
694
695                 
696                 return 0;
697
698         }
699         catch(exception& e) {
700                 m->errorOut(e, "GetSeqsCommand", "readGroup");
701                 exit(1);
702         }
703 }
704 //**********************************************************************************************************************
705 int GetSeqsCommand::readTax(){
706         try {
707                 string thisOutputDir = outputDir;
708                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
709                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
710                 ofstream out;
711                 m->openOutputFile(outputFileName, out);
712                 
713                 ifstream in;
714                 m->openInputFile(taxfile, in);
715                 string name, tax;
716                 
717                 bool wroteSomething = false;
718                 int selectedCount = 0;
719         
720         if (m->debug) { set<string> temp; sanity["tax"] = temp; }
721                 
722                 while(!in.eof()){
723
724                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
725
726                         in >> name;                             //read from first column
727                         in >> tax;                      //read from second column
728                         
729                         //if this name is in the accnos file
730                         if (names.count(name) != 0) {
731                                 wroteSomething = true;
732                                 
733                                 out << name << '\t' << tax << endl;
734                                 selectedCount++;
735                 
736                 if (m->debug) { sanity["tax"].insert(name); }
737                         }
738                                         
739                         m->gobble(in);
740                 }
741                 in.close();
742                 out.close();
743                 
744                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
745                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
746                 
747                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
748                         
749                 return 0;
750
751         }
752         catch(exception& e) {
753                 m->errorOut(e, "GetSeqsCommand", "readTax");
754                 exit(1);
755         }
756 }
757 //**********************************************************************************************************************
758 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
759 int GetSeqsCommand::readAlign(){
760         try {
761                 string thisOutputDir = outputDir;
762                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
763                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
764                 ofstream out;
765                 m->openOutputFile(outputFileName, out);
766                 
767
768                 ifstream in;
769                 m->openInputFile(alignfile, in);
770                 string name, junk;
771                 
772                 bool wroteSomething = false;
773                 int selectedCount = 0;
774                 
775                 //read column headers
776                 for (int i = 0; i < 16; i++) {  
777                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
778                         else                    {       break;                  }
779                 }
780                 out << endl;
781                 
782                 while(!in.eof()){
783                 
784                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
785
786
787                         in >> name;                             //read from first column
788                         
789                         //if this name is in the accnos file
790                         if (names.count(name) != 0) {
791                                 wroteSomething = true;
792                                 selectedCount++;
793                                 
794                                 out << name << '\t';
795                                 
796                                 //read rest
797                                 for (int i = 0; i < 15; i++) {  
798                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
799                                         else                    {       break;                  }
800                                 }
801                                 out << endl;
802                                 
803                         }else {//still read just don't do anything with it
804                                 //read rest
805                                 for (int i = 0; i < 15; i++) {  
806                                         if (!in.eof())  {       in >> junk;             }
807                                         else                    {       break;                  }
808                                 }
809                         }
810                         
811                         m->gobble(in);
812                 }
813                 in.close();
814                 out.close();
815                 
816                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
817                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
818                 
819                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
820                 
821                 return 0;
822                 
823         }
824         catch(exception& e) {
825                 m->errorOut(e, "GetSeqsCommand", "readAlign");
826                 exit(1);
827         }
828 }
829 //**********************************************************************************************************************
830
831 int GetSeqsCommand::readAccnos(){
832         try {
833                 
834                 ifstream in;
835                 m->openInputFile(accnosfile, in);
836                 string name;
837                 
838                 while(!in.eof()){
839                         in >> name;
840                                                 
841                         names.insert(name);
842                         
843                         m->gobble(in);
844                 }
845                 in.close();     
846                 
847                 return 0;
848
849         }
850         catch(exception& e) {
851                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
852                 exit(1);
853         }
854 }
855 //**********************************************************************************************************************
856 //just looking at common mistakes. 
857 int GetSeqsCommand::runSanityCheck(){
858         try {
859         string thisOutputDir = outputDir;
860         if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
861         string filename = outputDir + "get.seqs.debug.report";
862         
863         ofstream out;
864                 m->openOutputFile(filename, out); 
865
866        
867         //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs
868         if (fastafile != "") {
869             if (namefile != "") { //compare with fasta
870                 if (sanity["fasta"] != sanity["name"]) { //create mismatch file
871                     createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]);
872                 }
873             }
874             if (qualfile != "") {
875                 if (sanity["fasta"] != sanity["qual"]) { //create mismatch file
876                     createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]);
877                 }
878             }
879             if (taxfile != "") {
880                 if (sanity["fasta"] != sanity["tax"]) { //create mismatch file
881                     createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]);
882                 }
883             }
884         }
885         
886         //compare dupnames, groups and list if given to make sure they match
887         if (namefile != "") {
888             if (groupfile != "") {
889                 if (sanity["dupname"] != sanity["group"]) { //create mismatch file
890                     createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]);
891                 } 
892             }
893             if (listfile != "") {
894                 if (sanity["dupname"] != sanity["list"]) { //create mismatch file
895                     createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]);
896                 } 
897             }
898         }else{
899
900             if ((groupfile != "") && (fastafile != "")) {
901                 if (sanity["fasta"] != sanity["group"]) { //create mismatch file
902                     createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]);
903                 } 
904             }
905         }
906         
907         out.close();
908         
909         if (m->isBlank(filename)) { m->mothurRemove(filename); }
910         else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); }
911         
912         return 0;
913     }
914         catch(exception& e) {
915                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
916                 exit(1);
917         }
918 }
919 //**********************************************************************************************************************
920 //just looking at common mistakes. 
921 int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set<string> set1, set<string> set2){
922         try {
923         out << "****************************************" << endl << endl;
924         out << "Names unique to " << filename1 << ":\n";
925         
926         //remove names in set1 that are also in set2
927         for (set<string>::iterator it = set1.begin(); it != set1.end();) {
928             string name = *it;
929             
930             if (set2.count(name) == 0)  { out << name << endl;  } //name unique to set1
931             else                        { set2.erase(name);     } //you are in both so erase 
932             set1.erase(it++);
933         }
934         
935         out << "\nNames unique to " << filename2 << ":\n";
936         //output results
937         for (set<string>::iterator it = set2.begin(); it != set2.end(); it++) {  out << *it << endl;  }
938         
939         out << "****************************************" << endl << endl;
940         
941         return 0;
942     }
943         catch(exception& e) {
944                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
945                 exit(1);
946         }
947 }
948 //**********************************************************************************************************************
949
950 int GetSeqsCommand::compareAccnos(){
951         try {
952                 
953                 string thisOutputDir = outputDir;
954                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
955                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
956                 ofstream out;
957                 m->openOutputFile(outputFileName, out);
958                 
959                 ifstream in;
960                 m->openInputFile(accnosfile2, in);
961                 string name;
962                 
963                 set<string> namesAccnos2;
964                 set<string> namesDups;
965                 set<string> namesAccnos = names;
966                 
967                 map<string, int> nameCount;
968                 
969                 if (namefile != "") {
970                         ifstream inName;
971                         m->openInputFile(namefile, inName);
972                         
973                         
974                         while(!inName.eof()){
975                                 
976                                 if (m->control_pressed) { inName.close(); return 0; }
977                                 
978                                 string thisname, repnames;
979                                 
980                                 inName >> thisname;             m->gobble(inName);              //read from first column
981                                 inName >> repnames;                     //read from second column
982                                 
983                                 int num = m->getNumNames(repnames);
984                                 nameCount[thisname] = num;
985                                 
986                                 m->gobble(inName);
987                         }
988                         inName.close(); 
989                 }
990                 
991                 while(!in.eof()){
992                         in >> name;
993                         
994                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
995                                 int pos = name.find_last_of('_');
996                                 string tempName = name;
997                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
998                                 if (namesAccnos.count(tempName) == 0){
999                                         namesAccnos2.insert(name);
1000                                 }else { //you are in both so erase
1001                                         namesAccnos.erase(name);
1002                                         namesDups.insert(name);
1003                                 }
1004                         }else { //you are in both so erase
1005                                 namesAccnos.erase(name);
1006                                 namesDups.insert(name);
1007                         }
1008                         
1009                         m->gobble(in);
1010                 }
1011                 in.close();     
1012                 
1013                 out << "Names in both files : " + toString(namesDups.size()) << endl;
1014                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
1015                 
1016                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
1017                         out << (*it);
1018                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1019                         out << endl;
1020                 }
1021                 
1022                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
1023                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
1024                 
1025                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
1026                         out << (*it);
1027                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1028                         out << endl;
1029                 }
1030                 
1031                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
1032                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
1033                 
1034                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
1035                         out << (*it);
1036                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1037                         out << endl;
1038                 }
1039
1040                 out.close(); 
1041                 
1042                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
1043                 
1044                 return 0;
1045                 
1046         }
1047         catch(exception& e) {
1048                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
1049                 exit(1);
1050         }
1051 }
1052
1053
1054 //**********************************************************************************************************************
1055