]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
Revert to previous commit
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
45                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 GetSeqsCommand::GetSeqsCommand(){       
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71                 outputTypes["accnosreport"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 GetSeqsCommand::GetSeqsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["alignreport"] = tempOutNames;
108                         outputTypes["list"] = tempOutNames;
109                         outputTypes["qfile"] = tempOutNames;
110                         outputTypes["accnosreport"] = tempOutNames;
111                         
112                         //if the user changes the output directory command factory will send this info to us in the output parameter 
113                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("alignreport");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
126                                 }
127                                 
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("accnos");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
142                                 }
143                                 
144                                 it = parameters.find("accnos2");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
150                                 }
151                                 
152                                 it = parameters.find("list");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                                 
176                                 it = parameters.find("taxonomy");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
182                                 }
183                                 
184                                 it = parameters.find("qfile");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
190                                 }
191                         }
192
193                         
194                         //check for required parameters
195                         accnosfile = validParameter.validFile(parameters, "accnos", true);
196                         if (accnosfile == "not open") { abort = true; }
197                         else if (accnosfile == "not found") {  
198                                 accnosfile = m->getAccnosFile(); 
199                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
200                                 else { 
201                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
202                                         abort = true;
203                                 } 
204                         }else { m->setAccnosFile(accnosfile); } 
205                         
206                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
207                         
208                         fastafile = validParameter.validFile(parameters, "fasta", true);
209                         if (fastafile == "not open") { fastafile = ""; abort = true; }
210                         else if (fastafile == "not found") {  fastafile = "";  }
211                         else { m->setFastaFile(fastafile); }
212                         
213                         namefile = validParameter.validFile(parameters, "name", true);
214                         if (namefile == "not open") { namefile = ""; abort = true; }
215                         else if (namefile == "not found") {  namefile = "";  }  
216                         else { m->setNameFile(namefile); }
217                         
218                         groupfile = validParameter.validFile(parameters, "group", true);
219                         if (groupfile == "not open") { abort = true; }
220                         else if (groupfile == "not found") {  groupfile = "";  }        
221                         else { m->setGroupFile(groupfile); }
222                         
223                         alignfile = validParameter.validFile(parameters, "alignreport", true);
224                         if (alignfile == "not open") { abort = true; }
225                         else if (alignfile == "not found") {  alignfile = "";  }
226                         
227                         listfile = validParameter.validFile(parameters, "list", true);
228                         if (listfile == "not open") { abort = true; }
229                         else if (listfile == "not found") {  listfile = "";  }
230                         else { m->setListFile(listfile); }
231                         
232                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
233                         if (taxfile == "not open") { taxfile = ""; abort = true; }
234                         else if (taxfile == "not found") {  taxfile = "";  }
235                         else { m->setTaxonomyFile(taxfile); }
236                         
237                         qualfile = validParameter.validFile(parameters, "qfile", true);
238                         if (qualfile == "not open") { abort = true; }
239                         else if (qualfile == "not found") {  qualfile = "";  }
240                         else { m->setQualFile(qualfile); }
241                         
242                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
243                         if (accnosfile2 == "not open") { abort = true; }
244                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
245                         
246                         
247                         string usedDups = "true";
248                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
249                         dups = m->isTrue(temp);
250                         
251                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
252                 
253                         if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
254                                 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
255                                 parser.getNameFile(files);
256                         }
257                 }
258
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
262                 exit(1);
263         }
264 }
265 //**********************************************************************************************************************
266
267 int GetSeqsCommand::execute(){
268         try {
269                 
270                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
271                 
272                 //get names you want to keep
273                 readAccnos();
274                 
275                 if (m->control_pressed) { return 0; }
276                 
277                 //read through the correct file and output lines you want to keep
278                 if (namefile != "")                     {               readName();                     }
279                 if (fastafile != "")            {               readFasta();            }
280                 if (groupfile != "")            {               readGroup();            }
281                 if (alignfile != "")            {               readAlign();            }
282                 if (listfile != "")                     {               readList();                     }
283                 if (taxfile != "")                      {               readTax();                      }
284                 if (qualfile != "")                     {               readQual();                     }
285                 if (accnosfile2 != "")          {               compareAccnos();        }
286                 
287                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
288                 
289                 
290                 if (outputNames.size() != 0) {
291                         m->mothurOutEndLine();
292                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
293                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
294                         m->mothurOutEndLine();
295                         
296                         //set fasta file as new current fastafile
297                         string current = "";
298                         itTypes = outputTypes.find("fasta");
299                         if (itTypes != outputTypes.end()) {
300                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
301                         }
302                         
303                         itTypes = outputTypes.find("name");
304                         if (itTypes != outputTypes.end()) {
305                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
306                         }
307                         
308                         itTypes = outputTypes.find("group");
309                         if (itTypes != outputTypes.end()) {
310                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
311                         }
312                         
313                         itTypes = outputTypes.find("list");
314                         if (itTypes != outputTypes.end()) {
315                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
316                         }
317                         
318                         itTypes = outputTypes.find("taxonomy");
319                         if (itTypes != outputTypes.end()) {
320                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
321                         }
322                         
323                         itTypes = outputTypes.find("qfile");
324                         if (itTypes != outputTypes.end()) {
325                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
326                         }
327                         
328                 }
329                 
330                 return 0;               
331         }
332
333         catch(exception& e) {
334                 m->errorOut(e, "GetSeqsCommand", "execute");
335                 exit(1);
336         }
337 }
338
339 //**********************************************************************************************************************
340 int GetSeqsCommand::readFasta(){
341         try {
342                 string thisOutputDir = outputDir;
343                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
344                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
345                 ofstream out;
346                 m->openOutputFile(outputFileName, out);
347                 
348                 
349                 ifstream in;
350                 m->openInputFile(fastafile, in);
351                 string name;
352                 
353                 bool wroteSomething = false;
354                 int selectedCount = 0;
355                 
356                 while(!in.eof()){
357                 
358                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
359                         
360                         Sequence currSeq(in);
361                         name = currSeq.getName();
362                         
363                         if (name != "") {
364                                 //if this name is in the accnos file
365                                 if (names.count(name) != 0) {
366                                         wroteSomething = true;
367                                         
368                                         currSeq.printSequence(out);
369                                         selectedCount++;
370                                 }
371                         }
372                         m->gobble(in);
373                 }
374                 in.close();     
375                 out.close();
376                 
377                 
378                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
379                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
380                 
381                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
382                 
383                 return 0;
384
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "GetSeqsCommand", "readFasta");
388                 exit(1);
389         }
390 }
391 //**********************************************************************************************************************
392 int GetSeqsCommand::readQual(){
393         try {
394                 string thisOutputDir = outputDir;
395                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
396                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
397                 ofstream out;
398                 m->openOutputFile(outputFileName, out);
399                 
400                 
401                 ifstream in;
402                 m->openInputFile(qualfile, in);
403                 string name;
404                 
405                 bool wroteSomething = false;
406                 int selectedCount = 0;
407                 
408                 
409                 while(!in.eof()){       
410                         string saveName = "";
411                         string name = "";
412                         string scores = "";
413                         
414                         in >> name; 
415                                 
416                         if (name.length() != 0) { 
417                                 saveName = name.substr(1);
418                                 while (!in.eof())       {       
419                                         char c = in.get(); 
420                                         if (c == 10 || c == 13){        break;  }
421                                         else { name += c; }     
422                                 } 
423                                 m->gobble(in);
424                         }
425                         
426                         while(in){
427                                 char letter= in.get();
428                                 if(letter == '>'){      in.putback(letter);     break;  }
429                                 else{ scores += letter; }
430                         }
431                         
432                         m->gobble(in);
433                         
434                         if (names.count(saveName) != 0) {
435                                 wroteSomething = true;
436                                                 
437                                 out << name << endl << scores;
438                                 selectedCount++;
439                         }
440                         
441                         m->gobble(in);
442                 }
443                 in.close();
444                 out.close();
445                 
446                 
447                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
448                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
449                 
450                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
451
452                 
453                 return 0;
454                 
455         }
456         catch(exception& e) {
457                 m->errorOut(e, "GetSeqsCommand", "readQual");
458                 exit(1);
459         }
460 }
461 //**********************************************************************************************************************
462 int GetSeqsCommand::readList(){
463         try {
464                 string thisOutputDir = outputDir;
465                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
466                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
467                 ofstream out;
468                 m->openOutputFile(outputFileName, out);
469                 
470                 ifstream in;
471                 m->openInputFile(listfile, in);
472                 
473                 bool wroteSomething = false;
474                 int selectedCount = 0;
475                 
476                 while(!in.eof()){
477                         
478                         selectedCount = 0;
479                         
480                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
481
482                         //read in list vector
483                         ListVector list(in);
484                         
485                         //make a new list vector
486                         ListVector newList;
487                         newList.setLabel(list.getLabel());
488                         
489                         //for each bin
490                         for (int i = 0; i < list.getNumBins(); i++) {
491                         
492                                 //parse out names that are in accnos file
493                                 string binnames = list.get(i);
494                                 
495                                 string newNames = "";
496                                 while (binnames.find_first_of(',') != -1) { 
497                                         string name = binnames.substr(0,binnames.find_first_of(','));
498                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
499                                         
500                                         //if that name is in the .accnos file, add it
501                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; }
502                                 }
503                         
504                                 //get last name
505                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++; }
506
507                                 //if there are names in this bin add to new list
508                                 if (newNames != "") { 
509                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
510                                         newList.push_back(newNames);    
511                                 }
512                         }
513                                 
514                         //print new listvector
515                         if (newList.getNumBins() != 0) {
516                                 wroteSomething = true;
517                                 newList.print(out);
518                         }
519                         
520                         m->gobble(in);
521                 }
522                 in.close();     
523                 out.close();
524                 
525                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
526                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
527                 
528                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
529                 
530                 return 0;
531
532         }
533         catch(exception& e) {
534                 m->errorOut(e, "GetSeqsCommand", "readList");
535                 exit(1);
536         }
537 }
538 //**********************************************************************************************************************
539 int GetSeqsCommand::readName(){
540         try {
541                 string thisOutputDir = outputDir;
542                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
543                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
544                 ofstream out;
545                 m->openOutputFile(outputFileName, out);
546                 
547
548                 ifstream in;
549                 m->openInputFile(namefile, in);
550                 string name, firstCol, secondCol;
551                 
552                 bool wroteSomething = false;
553                 int selectedCount = 0;
554                 
555                 while(!in.eof()){
556                 
557                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
558
559                         in >> firstCol;                         
560                         in >> secondCol;
561                         
562                         string hold = "";
563                         if (dups) { hold = secondCol; }
564                         
565                         vector<string> parsedNames;
566                         m->splitAtComma(secondCol, parsedNames);
567                         
568                         vector<string> validSecond;
569                         for (int i = 0; i < parsedNames.size(); i++) {
570                                 if (names.count(parsedNames[i]) != 0) {
571                                         validSecond.push_back(parsedNames[i]);
572                                 }
573                         }
574
575                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
576                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
577                                 out << firstCol << '\t' << hold << endl;
578                                 wroteSomething = true;
579                                 selectedCount += parsedNames.size();
580                         }else {
581                                 selectedCount += validSecond.size();
582                                 
583                                 //if the name in the first column is in the set then print it and any other names in second column also in set
584                                 if (names.count(firstCol) != 0) {
585                                 
586                                         wroteSomething = true;
587                                         
588                                         out << firstCol << '\t';
589                                         
590                                         //you know you have at least one valid second since first column is valid
591                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
592                                         out << validSecond[validSecond.size()-1] << endl;
593                                         
594                                 
595                                 //make first name in set you come to first column and then add the remaining names to second column
596                                 }else {
597                                         //you want part of this row
598                                         if (validSecond.size() != 0) {
599                                         
600                                                 wroteSomething = true;
601                                                 
602                                                 out << validSecond[0] << '\t';
603                                         
604                                                 //you know you have at least one valid second since first column is valid
605                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
606                                                 out << validSecond[validSecond.size()-1] << endl;
607                                         }
608                                 }
609                         }
610                         m->gobble(in);
611                 }
612                 in.close();
613                 out.close();
614                 
615                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
616                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
617                 
618                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
619                 
620                 return 0;
621                 
622         }
623         catch(exception& e) {
624                 m->errorOut(e, "GetSeqsCommand", "readName");
625                 exit(1);
626         }
627 }
628
629 //**********************************************************************************************************************
630 int GetSeqsCommand::readGroup(){
631         try {
632                 string thisOutputDir = outputDir;
633                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
634                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
635                 ofstream out;
636                 m->openOutputFile(outputFileName, out);
637                 
638
639                 ifstream in;
640                 m->openInputFile(groupfile, in);
641                 string name, group;
642                 
643                 bool wroteSomething = false;
644                 int selectedCount = 0;
645                 
646                 while(!in.eof()){
647
648                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
649
650
651                         in >> name;                             //read from first column
652                         in >> group;                    //read from second column
653                         
654                         //if this name is in the accnos file
655                         if (names.count(name) != 0) {
656                                 wroteSomething = true;
657                                 
658                                 out << name << '\t' << group << endl;
659                                 selectedCount++;
660                         }
661                                         
662                         m->gobble(in);
663                 }
664                 in.close();
665                 out.close();
666                 
667                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
668                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
669                 
670                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
671
672                 
673                 return 0;
674
675         }
676         catch(exception& e) {
677                 m->errorOut(e, "GetSeqsCommand", "readGroup");
678                 exit(1);
679         }
680 }
681 //**********************************************************************************************************************
682 int GetSeqsCommand::readTax(){
683         try {
684                 string thisOutputDir = outputDir;
685                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
686                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
687                 ofstream out;
688                 m->openOutputFile(outputFileName, out);
689                 
690                 ifstream in;
691                 m->openInputFile(taxfile, in);
692                 string name, tax;
693                 
694                 bool wroteSomething = false;
695                 int selectedCount = 0;
696                 
697                 while(!in.eof()){
698
699                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
700
701                         in >> name;                             //read from first column
702                         in >> tax;                      //read from second column
703                         
704                         //if this name is in the accnos file
705                         if (names.count(name) != 0) {
706                                 wroteSomething = true;
707                                 
708                                 out << name << '\t' << tax << endl;
709                                 selectedCount++;
710                         }
711                                         
712                         m->gobble(in);
713                 }
714                 in.close();
715                 out.close();
716                 
717                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
718                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
719                 
720                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
721                         
722                 return 0;
723
724         }
725         catch(exception& e) {
726                 m->errorOut(e, "GetSeqsCommand", "readTax");
727                 exit(1);
728         }
729 }
730 //**********************************************************************************************************************
731 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
732 int GetSeqsCommand::readAlign(){
733         try {
734                 string thisOutputDir = outputDir;
735                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
736                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
737                 ofstream out;
738                 m->openOutputFile(outputFileName, out);
739                 
740
741                 ifstream in;
742                 m->openInputFile(alignfile, in);
743                 string name, junk;
744                 
745                 bool wroteSomething = false;
746                 int selectedCount = 0;
747                 
748                 //read column headers
749                 for (int i = 0; i < 16; i++) {  
750                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
751                         else                    {       break;                  }
752                 }
753                 out << endl;
754                 
755                 while(!in.eof()){
756                 
757                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
758
759
760                         in >> name;                             //read from first column
761                         
762                         //if this name is in the accnos file
763                         if (names.count(name) != 0) {
764                                 wroteSomething = true;
765                                 selectedCount++;
766                                 
767                                 out << name << '\t';
768                                 
769                                 //read rest
770                                 for (int i = 0; i < 15; i++) {  
771                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
772                                         else                    {       break;                  }
773                                 }
774                                 out << endl;
775                                 
776                         }else {//still read just don't do anything with it
777                                 //read rest
778                                 for (int i = 0; i < 15; i++) {  
779                                         if (!in.eof())  {       in >> junk;             }
780                                         else                    {       break;                  }
781                                 }
782                         }
783                         
784                         m->gobble(in);
785                 }
786                 in.close();
787                 out.close();
788                 
789                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
790                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
791                 
792                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
793                 
794                 return 0;
795                 
796         }
797         catch(exception& e) {
798                 m->errorOut(e, "GetSeqsCommand", "readAlign");
799                 exit(1);
800         }
801 }
802 //**********************************************************************************************************************
803
804 int GetSeqsCommand::readAccnos(){
805         try {
806                 
807                 ifstream in;
808                 m->openInputFile(accnosfile, in);
809                 string name;
810                 
811                 while(!in.eof()){
812                         in >> name;
813                                                 
814                         names.insert(name);
815                         
816                         m->gobble(in);
817                 }
818                 in.close();     
819                 
820                 return 0;
821
822         }
823         catch(exception& e) {
824                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
825                 exit(1);
826         }
827 }
828 //**********************************************************************************************************************
829
830 int GetSeqsCommand::compareAccnos(){
831         try {
832                 
833                 string thisOutputDir = outputDir;
834                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
835                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
836                 ofstream out;
837                 m->openOutputFile(outputFileName, out);
838                 
839                 ifstream in;
840                 m->openInputFile(accnosfile2, in);
841                 string name;
842                 
843                 set<string> namesAccnos2;
844                 set<string> namesDups;
845                 set<string> namesAccnos = names;
846                 
847                 map<string, int> nameCount;
848                 
849                 if (namefile != "") {
850                         ifstream inName;
851                         m->openInputFile(namefile, inName);
852                         
853                         
854                         while(!inName.eof()){
855                                 
856                                 if (m->control_pressed) { inName.close(); return 0; }
857                                 
858                                 string thisname, repnames;
859                                 
860                                 inName >> thisname;             m->gobble(inName);              //read from first column
861                                 inName >> repnames;                     //read from second column
862                                 
863                                 int num = m->getNumNames(repnames);
864                                 nameCount[thisname] = num;
865                                 
866                                 m->gobble(inName);
867                         }
868                         inName.close(); 
869                 }
870                 
871                 while(!in.eof()){
872                         in >> name;
873                         
874                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
875                                 int pos = name.find_last_of('_');
876                                 string tempName = name;
877                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
878                                 if (namesAccnos.count(tempName) == 0){
879                                         namesAccnos2.insert(name);
880                                 }else { //you are in both so erase
881                                         namesAccnos.erase(name);
882                                         namesDups.insert(name);
883                                 }
884                         }else { //you are in both so erase
885                                 namesAccnos.erase(name);
886                                 namesDups.insert(name);
887                         }
888                         
889                         m->gobble(in);
890                 }
891                 in.close();     
892                 
893                 out << "Names in both files : " + toString(namesDups.size()) << endl;
894                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
895                 
896                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
897                         out << (*it);
898                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
899                         out << endl;
900                 }
901                 
902                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
903                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
904                 
905                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
906                         out << (*it);
907                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
908                         out << endl;
909                 }
910                 
911                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
912                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
913                 
914                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
915                         out << (*it);
916                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
917                         out << endl;
918                 }
919
920                 out.close(); 
921                 
922                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
923                 
924                 return 0;
925                 
926         }
927         catch(exception& e) {
928                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
929                 exit(1);
930         }
931 }
932
933
934 //**********************************************************************************************************************
935