]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
9f2b5060fcaa715f9a2fd3fc484ba585d6322da9
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::getValidParameters(){    
16         try {
17                 string Array[] =  {"fasta","name", "group", "qfile","alignreport", "accnos", "accnos2","dups", "list","taxonomy","outputdir","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "GetSeqsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 GetSeqsCommand::GetSeqsCommand(){       
28         try {
29                 abort = true;
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["fasta"] = tempOutNames;
33                 outputTypes["taxonomy"] = tempOutNames;
34                 outputTypes["name"] = tempOutNames;
35                 outputTypes["group"] = tempOutNames;
36                 outputTypes["alignreport"] = tempOutNames;
37                 outputTypes["list"] = tempOutNames;
38                 outputTypes["qfile"] = tempOutNames;
39                 outputTypes["accnosreport"] = tempOutNames;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
43                 exit(1);
44         }
45 }
46 //**********************************************************************************************************************
47 vector<string> GetSeqsCommand::getRequiredParameters(){ 
48         try {
49                 string Array[] =  {"accnos"};
50                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "GetSeqsCommand", "getRequiredParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 vector<string> GetSeqsCommand::getRequiredFiles(){      
60         try {
61                 vector<string> myArray;
62                 return myArray;
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "GetSeqsCommand", "getRequiredFiles");
66                 exit(1);
67         }
68 }
69 //**********************************************************************************************************************
70 GetSeqsCommand::GetSeqsCommand(string option)  {
71         try {
72                 abort = false;
73                                 
74                 //allow user to run help
75                 if(option == "help") { help(); abort = true; }
76                 
77                 else {
78                         //valid paramters for this command
79                         string Array[] =  {"fasta","name", "group", "alignreport", "qfile", "accnos", "accnos2","dups", "list","taxonomy","outputdir","inputdir"};
80                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
81                         
82                         OptionParser parser(option);
83                         map<string,string> parameters = parser.getParameters();
84                         
85                         ValidParameters validParameter;
86                         map<string,string>::iterator it;
87                         
88                         //check to make sure all parameters are valid for command
89                         for (it = parameters.begin(); it != parameters.end(); it++) { 
90                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
91                         }
92                         
93                         //initialize outputTypes
94                         vector<string> tempOutNames;
95                         outputTypes["fasta"] = tempOutNames;
96                         outputTypes["taxonomy"] = tempOutNames;
97                         outputTypes["name"] = tempOutNames;
98                         outputTypes["group"] = tempOutNames;
99                         outputTypes["alignreport"] = tempOutNames;
100                         outputTypes["list"] = tempOutNames;
101                         outputTypes["qfile"] = tempOutNames;
102                         outputTypes["accnosreport"] = tempOutNames;
103                         
104                         //if the user changes the output directory command factory will send this info to us in the output parameter 
105                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("alignreport");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
118                                 }
119                                 
120                                 it = parameters.find("fasta");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
126                                 }
127                                 
128                                 it = parameters.find("accnos");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
134                                 }
135                                 
136                                 it = parameters.find("accnos2");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
142                                 }
143                                 
144                                 it = parameters.find("list");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
150                                 }
151                                 
152                                 it = parameters.find("name");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("group");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
166                                 }
167                                 
168                                 it = parameters.find("taxonomy");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
174                                 }
175                                 
176                                 it = parameters.find("qfile");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
182                                 }
183                         }
184
185                         
186                         //check for required parameters
187                         accnosfile = validParameter.validFile(parameters, "accnos", true);
188                         if (accnosfile == "not open") { abort = true; }
189                         else if (accnosfile == "not found") {  accnosfile = "";  m->mothurOut("You must provide an accnos file."); m->mothurOutEndLine(); abort = true; }       
190                         
191                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
192                         
193                         fastafile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastafile == "not open") { abort = true; }
195                         else if (fastafile == "not found") {  fastafile = "";  }        
196                         
197                         namefile = validParameter.validFile(parameters, "name", true);
198                         if (namefile == "not open") { abort = true; }
199                         else if (namefile == "not found") {  namefile = "";  }  
200                         
201                         groupfile = validParameter.validFile(parameters, "group", true);
202                         if (groupfile == "not open") { abort = true; }
203                         else if (groupfile == "not found") {  groupfile = "";  }        
204                         
205                         alignfile = validParameter.validFile(parameters, "alignreport", true);
206                         if (alignfile == "not open") { abort = true; }
207                         else if (alignfile == "not found") {  alignfile = "";  }
208                         
209                         listfile = validParameter.validFile(parameters, "list", true);
210                         if (listfile == "not open") { abort = true; }
211                         else if (listfile == "not found") {  listfile = "";  }
212                         
213                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
214                         if (taxfile == "not open") { abort = true; }
215                         else if (taxfile == "not found") {  taxfile = "";  }
216                         
217                         qualfile = validParameter.validFile(parameters, "qfile", true);
218                         if (qualfile == "not open") { abort = true; }
219                         else if (qualfile == "not found") {  qualfile = "";  }
220                         
221                         string usedDups = "true";
222                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "false"; usedDups = ""; }
223                         dups = m->isTrue(temp);
224                         
225                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
226                 
227                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
228
229                 }
230
231         }
232         catch(exception& e) {
233                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
234                 exit(1);
235         }
236 }
237 //**********************************************************************************************************************
238
239 void GetSeqsCommand::help(){
240         try {
241                 m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n");
242                 m->mothurOut("It outputs a file containing only the sequences in the .accnos file.\n");
243                 m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos and at least one of the other parameters.\n");
244                 m->mothurOut("The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n");
245                 m->mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
246                 m->mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
247                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "GetSeqsCommand", "help");
251                 exit(1);
252         }
253 }
254
255 //**********************************************************************************************************************
256
257 int GetSeqsCommand::execute(){
258         try {
259                 
260                 if (abort == true) { return 0; }
261                 
262                 //get names you want to keep
263                 readAccnos();
264                 
265                 if (m->control_pressed) { return 0; }
266                 
267                 //read through the correct file and output lines you want to keep
268                 if (namefile != "")                     {               readName();                     }
269                 if (fastafile != "")            {               readFasta();            }
270                 if (groupfile != "")            {               readGroup();            }
271                 if (alignfile != "")            {               readAlign();            }
272                 if (listfile != "")                     {               readList();                     }
273                 if (taxfile != "")                      {               readTax();                      }
274                 if (qualfile != "")                     {               readQual();                     }
275                 if (accnosfile2 != "")          {               compareAccnos();        }
276                 
277                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
278                 
279                 m->mothurOut("Selected " + toString(names.size()) + " sequences."); m->mothurOutEndLine();
280                 
281                 if (outputNames.size() != 0) {
282                         m->mothurOutEndLine();
283                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
284                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
285                         m->mothurOutEndLine();
286                 }
287                 
288                 return 0;               
289         }
290
291         catch(exception& e) {
292                 m->errorOut(e, "GetSeqsCommand", "execute");
293                 exit(1);
294         }
295 }
296
297 //**********************************************************************************************************************
298 int GetSeqsCommand::readFasta(){
299         try {
300                 string thisOutputDir = outputDir;
301                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
302                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
303                 ofstream out;
304                 m->openOutputFile(outputFileName, out);
305                 
306                 
307                 ifstream in;
308                 m->openInputFile(fastafile, in);
309                 string name;
310                 
311                 bool wroteSomething = false;
312                 
313                 while(!in.eof()){
314                 
315                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
316                         
317                         Sequence currSeq(in);
318                         name = currSeq.getName();
319                         
320                         if (name != "") {
321                                 //if this name is in the accnos file
322                                 if (names.count(name) != 0) {
323                                         wroteSomething = true;
324                                         
325                                         currSeq.printSequence(out);
326                                 }
327                         }
328                         m->gobble(in);
329                 }
330                 in.close();     
331                 out.close();
332                 
333                 
334                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
335                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
336                 
337                 return 0;
338
339         }
340         catch(exception& e) {
341                 m->errorOut(e, "GetSeqsCommand", "readFasta");
342                 exit(1);
343         }
344 }
345 //**********************************************************************************************************************
346 int GetSeqsCommand::readQual(){
347         try {
348                 string thisOutputDir = outputDir;
349                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
350                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
351                 ofstream out;
352                 m->openOutputFile(outputFileName, out);
353                 
354                 
355                 ifstream in;
356                 m->openInputFile(qualfile, in);
357                 string name;
358                 
359                 bool wroteSomething = false;
360                 
361                 
362                 while(!in.eof()){       
363                         string saveName = "";
364                         string name = "";
365                         string scores = "";
366                         
367                         in >> name; 
368                                 
369                         if (name.length() != 0) { 
370                                 saveName = name.substr(1);
371                                 while (!in.eof())       {       
372                                         char c = in.get(); 
373                                         if (c == 10 || c == 13){        break;  }
374                                         else { name += c; }     
375                                 } 
376                                 m->gobble(in);
377                         }
378                         
379                         while(in){
380                                 char letter= in.get();
381                                 if(letter == '>'){      in.putback(letter);     break;  }
382                                 else{ scores += letter; }
383                         }
384                         
385                         m->gobble(in);
386                         
387                         if (names.count(saveName) != 0) {
388                                 wroteSomething = true;
389                                                 
390                                 out << name << endl << scores;
391                         }
392                         
393                         m->gobble(in);
394                 }
395                 in.close();
396                 out.close();
397                 
398                 
399                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
400                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
401                 
402                 return 0;
403                 
404         }
405         catch(exception& e) {
406                 m->errorOut(e, "GetSeqsCommand", "readQual");
407                 exit(1);
408         }
409 }
410 //**********************************************************************************************************************
411 int GetSeqsCommand::readList(){
412         try {
413                 string thisOutputDir = outputDir;
414                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
415                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
416                 ofstream out;
417                 m->openOutputFile(outputFileName, out);
418                 
419                 ifstream in;
420                 m->openInputFile(listfile, in);
421                 
422                 bool wroteSomething = false;
423                 
424                 while(!in.eof()){
425                         
426                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
427
428                         //read in list vector
429                         ListVector list(in);
430                         
431                         //make a new list vector
432                         ListVector newList;
433                         newList.setLabel(list.getLabel());
434                         
435                         //for each bin
436                         for (int i = 0; i < list.getNumBins(); i++) {
437                         
438                                 //parse out names that are in accnos file
439                                 string binnames = list.get(i);
440                                 
441                                 string newNames = "";
442                                 while (binnames.find_first_of(',') != -1) { 
443                                         string name = binnames.substr(0,binnames.find_first_of(','));
444                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
445                                         
446                                         //if that name is in the .accnos file, add it
447                                         if (names.count(name) != 0) {  newNames += name + ",";  }
448                                 }
449                         
450                                 //get last name
451                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
452
453                                 //if there are names in this bin add to new list
454                                 if (newNames != "") { 
455                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
456                                         newList.push_back(newNames);    
457                                 }
458                         }
459                                 
460                         //print new listvector
461                         if (newList.getNumBins() != 0) {
462                                 wroteSomething = true;
463                                 newList.print(out);
464                         }
465                         
466                         m->gobble(in);
467                 }
468                 in.close();     
469                 out.close();
470                 
471                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
472                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
473                 
474                 return 0;
475
476         }
477         catch(exception& e) {
478                 m->errorOut(e, "GetSeqsCommand", "readList");
479                 exit(1);
480         }
481 }
482 //**********************************************************************************************************************
483 int GetSeqsCommand::readName(){
484         try {
485                 string thisOutputDir = outputDir;
486                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
487                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
488                 ofstream out;
489                 m->openOutputFile(outputFileName, out);
490                 
491
492                 ifstream in;
493                 m->openInputFile(namefile, in);
494                 string name, firstCol, secondCol;
495                 
496                 bool wroteSomething = false;
497                 
498                 
499                 while(!in.eof()){
500                 
501                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
502
503                         in >> firstCol;                         
504                         in >> secondCol;
505                         
506                         string hold = "";
507                         if (dups) { hold = secondCol; }
508                         
509                         vector<string> parsedNames;
510                         //parse second column saving each name
511                         while (secondCol.find_first_of(',') != -1) { 
512                                 name = secondCol.substr(0,secondCol.find_first_of(','));
513                                 secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
514                                 parsedNames.push_back(name);
515                         }
516                         
517                         //get name after last ,
518                         parsedNames.push_back(secondCol);
519                         
520                         vector<string> validSecond;
521                         for (int i = 0; i < parsedNames.size(); i++) {
522                                 if (names.count(parsedNames[i]) != 0) {
523                                         validSecond.push_back(parsedNames[i]);
524                                 }
525                         }
526
527                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
528                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
529                                 out << firstCol << '\t' << hold << endl;
530                                 wroteSomething = true;
531                         }else {
532                                 //if the name in the first column is in the set then print it and any other names in second column also in set
533                                 if (names.count(firstCol) != 0) {
534                                 
535                                         wroteSomething = true;
536                                         
537                                         out << firstCol << '\t';
538                                         
539                                         //you know you have at least one valid second since first column is valid
540                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
541                                         out << validSecond[validSecond.size()-1] << endl;
542                                         
543                                 
544                                 //make first name in set you come to first column and then add the remaining names to second column
545                                 }else {
546                                         //you want part of this row
547                                         if (validSecond.size() != 0) {
548                                         
549                                                 wroteSomething = true;
550                                                 
551                                                 out << validSecond[0] << '\t';
552                                         
553                                                 //you know you have at least one valid second since first column is valid
554                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
555                                                 out << validSecond[validSecond.size()-1] << endl;
556                                         }
557                                 }
558                         }
559                         m->gobble(in);
560                 }
561                 in.close();
562                 out.close();
563                 
564                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
565                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
566                 
567                 return 0;
568                 
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "GetSeqsCommand", "readName");
572                 exit(1);
573         }
574 }
575
576 //**********************************************************************************************************************
577 int GetSeqsCommand::readGroup(){
578         try {
579                 string thisOutputDir = outputDir;
580                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
581                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
582                 ofstream out;
583                 m->openOutputFile(outputFileName, out);
584                 
585
586                 ifstream in;
587                 m->openInputFile(groupfile, in);
588                 string name, group;
589                 
590                 bool wroteSomething = false;
591                 
592                 while(!in.eof()){
593
594                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
595
596
597                         in >> name;                             //read from first column
598                         in >> group;                    //read from second column
599                         
600                         //if this name is in the accnos file
601                         if (names.count(name) != 0) {
602                                 wroteSomething = true;
603                                 
604                                 out << name << '\t' << group << endl;
605                         }
606                                         
607                         m->gobble(in);
608                 }
609                 in.close();
610                 out.close();
611                 
612                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
613                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
614                 
615                 return 0;
616
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "GetSeqsCommand", "readGroup");
620                 exit(1);
621         }
622 }
623 //**********************************************************************************************************************
624 int GetSeqsCommand::readTax(){
625         try {
626                 string thisOutputDir = outputDir;
627                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
628                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
629                 ofstream out;
630                 m->openOutputFile(outputFileName, out);
631                 
632                 ifstream in;
633                 m->openInputFile(taxfile, in);
634                 string name, tax;
635                 
636                 bool wroteSomething = false;
637                 
638                 while(!in.eof()){
639
640                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
641
642                         in >> name;                             //read from first column
643                         in >> tax;                      //read from second column
644                         
645                         //if this name is in the accnos file
646                         if (names.count(name) != 0) {
647                                 wroteSomething = true;
648                                 
649                                 out << name << '\t' << tax << endl;
650                         }
651                                         
652                         m->gobble(in);
653                 }
654                 in.close();
655                 out.close();
656                 
657                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
658                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
659                         
660                 return 0;
661
662         }
663         catch(exception& e) {
664                 m->errorOut(e, "GetSeqsCommand", "readTax");
665                 exit(1);
666         }
667 }
668 //**********************************************************************************************************************
669 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
670 int GetSeqsCommand::readAlign(){
671         try {
672                 string thisOutputDir = outputDir;
673                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
674                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
675                 ofstream out;
676                 m->openOutputFile(outputFileName, out);
677                 
678
679                 ifstream in;
680                 m->openInputFile(alignfile, in);
681                 string name, junk;
682                 
683                 bool wroteSomething = false;
684                 
685                 //read column headers
686                 for (int i = 0; i < 16; i++) {  
687                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
688                         else                    {       break;                  }
689                 }
690                 out << endl;
691                 
692                 while(!in.eof()){
693                 
694                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
695
696
697                         in >> name;                             //read from first column
698                         
699                         //if this name is in the accnos file
700                         if (names.count(name) != 0) {
701                                 wroteSomething = true;
702                                 
703                                 out << name << '\t';
704                                 
705                                 //read rest
706                                 for (int i = 0; i < 15; i++) {  
707                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
708                                         else                    {       break;                  }
709                                 }
710                                 out << endl;
711                                 
712                         }else {//still read just don't do anything with it
713                                 //read rest
714                                 for (int i = 0; i < 15; i++) {  
715                                         if (!in.eof())  {       in >> junk;             }
716                                         else                    {       break;                  }
717                                 }
718                         }
719                         
720                         m->gobble(in);
721                 }
722                 in.close();
723                 out.close();
724                 
725                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
726                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
727                 
728                 return 0;
729                 
730         }
731         catch(exception& e) {
732                 m->errorOut(e, "GetSeqsCommand", "readAlign");
733                 exit(1);
734         }
735 }
736 //**********************************************************************************************************************
737
738 int GetSeqsCommand::readAccnos(){
739         try {
740                 
741                 ifstream in;
742                 m->openInputFile(accnosfile, in);
743                 string name;
744                 
745                 while(!in.eof()){
746                         in >> name;
747                                                 
748                         names.insert(name);
749                         
750                         m->gobble(in);
751                 }
752                 in.close();     
753                 
754                 return 0;
755
756         }
757         catch(exception& e) {
758                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
759                 exit(1);
760         }
761 }
762 //**********************************************************************************************************************
763
764 int GetSeqsCommand::compareAccnos(){
765         try {
766                 
767                 string thisOutputDir = outputDir;
768                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
769                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
770                 ofstream out;
771                 m->openOutputFile(outputFileName, out);
772                 
773                 ifstream in;
774                 m->openInputFile(accnosfile2, in);
775                 string name;
776                 
777                 set<string> namesAccnos2;
778                 set<string> namesDups;
779                 set<string> namesAccnos = names;
780                 
781                 map<string, int> nameCount;
782                 
783                 if (namefile != "") {
784                         ifstream inName;
785                         m->openInputFile(namefile, inName);
786                         
787                         
788                         while(!inName.eof()){
789                                 
790                                 if (m->control_pressed) { inName.close(); return 0; }
791                                 
792                                 string thisname, repnames;
793                                 
794                                 inName >> thisname;             m->gobble(inName);              //read from first column
795                                 inName >> repnames;                     //read from second column
796                                 
797                                 int num = m->getNumNames(repnames);
798                                 nameCount[thisname] = num;
799                                 
800                                 m->gobble(inName);
801                         }
802                         inName.close(); 
803                 }
804                 
805                 while(!in.eof()){
806                         in >> name;
807                         
808                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
809                                 namesAccnos2.insert(name);
810                         }else { //you are in both so erase
811                                 namesAccnos.erase(name);
812                                 namesDups.insert(name);
813                         }
814                         
815                         m->gobble(in);
816                 }
817                 in.close();     
818                 
819                 out << "Names in both files : " + toString(namesDups.size()) << endl;
820                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
821                 
822                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
823                         out << (*it);
824                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
825                         out << endl;
826                 }
827                 
828                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
829                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
830                 
831                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
832                         out << (*it);
833                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
834                         out << endl;
835                 }
836                 
837                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
838                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
839                 
840                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
841                         out << (*it);
842                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
843                         out << endl;
844                 }
845
846                 out.close(); 
847                 
848                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
849                 
850                 return 0;
851                 
852         }
853         catch(exception& e) {
854                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
855                 exit(1);
856         }
857 }
858
859
860 //**********************************************************************************************************************
861