]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
working on current change
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
45                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 GetSeqsCommand::GetSeqsCommand(){       
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71                 outputTypes["accnosreport"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 GetSeqsCommand::GetSeqsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["alignreport"] = tempOutNames;
108                         outputTypes["list"] = tempOutNames;
109                         outputTypes["qfile"] = tempOutNames;
110                         outputTypes["accnosreport"] = tempOutNames;
111                         
112                         //if the user changes the output directory command factory will send this info to us in the output parameter 
113                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("alignreport");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
126                                 }
127                                 
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("accnos");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
142                                 }
143                                 
144                                 it = parameters.find("accnos2");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
150                                 }
151                                 
152                                 it = parameters.find("list");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                                 
176                                 it = parameters.find("taxonomy");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
182                                 }
183                                 
184                                 it = parameters.find("qfile");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
190                                 }
191                         }
192
193                         
194                         //check for required parameters
195                         accnosfile = validParameter.validFile(parameters, "accnos", true);
196                         if (accnosfile == "not open") { abort = true; }
197                         else if (accnosfile == "not found") {  
198                                 accnosfile = m->getAccnosFile(); 
199                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
200                                 else { 
201                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
202                                         abort = true;
203                                 } 
204                         }else { m->setAccnosFile(accnosfile); } 
205                         
206                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
207                         
208                         fastafile = validParameter.validFile(parameters, "fasta", true);
209                         if (fastafile == "not open") { abort = true; }
210                         else if (fastafile == "not found") {  fastafile = "";  }
211                         else { m->setFastaFile(fastafile); }
212                         
213                         namefile = validParameter.validFile(parameters, "name", true);
214                         if (namefile == "not open") { abort = true; }
215                         else if (namefile == "not found") {  namefile = "";  }  
216                         else { m->setNameFile(namefile); }
217                         
218                         groupfile = validParameter.validFile(parameters, "group", true);
219                         if (groupfile == "not open") { abort = true; }
220                         else if (groupfile == "not found") {  groupfile = "";  }        
221                         else { m->setGroupFile(groupfile); }
222                         
223                         alignfile = validParameter.validFile(parameters, "alignreport", true);
224                         if (alignfile == "not open") { abort = true; }
225                         else if (alignfile == "not found") {  alignfile = "";  }
226                         
227                         listfile = validParameter.validFile(parameters, "list", true);
228                         if (listfile == "not open") { abort = true; }
229                         else if (listfile == "not found") {  listfile = "";  }
230                         else { m->setListFile(listfile); }
231                         
232                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
233                         if (taxfile == "not open") { abort = true; }
234                         else if (taxfile == "not found") {  taxfile = "";  }
235                         else { m->setTaxonomyFile(taxfile); }
236                         
237                         qualfile = validParameter.validFile(parameters, "qfile", true);
238                         if (qualfile == "not open") { abort = true; }
239                         else if (qualfile == "not found") {  qualfile = "";  }
240                         else { m->setQualFile(qualfile); }
241                         
242                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
243                         if (accnosfile2 == "not open") { abort = true; }
244                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
245                         
246                         
247                         string usedDups = "true";
248                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
249                         dups = m->isTrue(temp);
250                         
251                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
252                 }
253
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
257                 exit(1);
258         }
259 }
260 //**********************************************************************************************************************
261
262 int GetSeqsCommand::execute(){
263         try {
264                 
265                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
266                 
267                 //get names you want to keep
268                 readAccnos();
269                 
270                 if (m->control_pressed) { return 0; }
271                 
272                 //read through the correct file and output lines you want to keep
273                 if (namefile != "")                     {               readName();                     }
274                 if (fastafile != "")            {               readFasta();            }
275                 if (groupfile != "")            {               readGroup();            }
276                 if (alignfile != "")            {               readAlign();            }
277                 if (listfile != "")                     {               readList();                     }
278                 if (taxfile != "")                      {               readTax();                      }
279                 if (qualfile != "")                     {               readQual();                     }
280                 if (accnosfile2 != "")          {               compareAccnos();        }
281                 
282                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
283                 
284                 m->mothurOut("Selected " + toString(names.size()) + " sequences."); m->mothurOutEndLine();
285                 
286                 if (outputNames.size() != 0) {
287                         m->mothurOutEndLine();
288                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
289                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
290                         m->mothurOutEndLine();
291                         
292                         //set fasta file as new current fastafile
293                         string current = "";
294                         itTypes = outputTypes.find("fasta");
295                         if (itTypes != outputTypes.end()) {
296                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
297                         }
298                         
299                         itTypes = outputTypes.find("name");
300                         if (itTypes != outputTypes.end()) {
301                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
302                         }
303                         
304                         itTypes = outputTypes.find("group");
305                         if (itTypes != outputTypes.end()) {
306                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
307                         }
308                         
309                         itTypes = outputTypes.find("list");
310                         if (itTypes != outputTypes.end()) {
311                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
312                         }
313                         
314                         itTypes = outputTypes.find("taxonomy");
315                         if (itTypes != outputTypes.end()) {
316                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
317                         }
318                         
319                         itTypes = outputTypes.find("qfile");
320                         if (itTypes != outputTypes.end()) {
321                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
322                         }
323                         
324                 }
325                 
326                 return 0;               
327         }
328
329         catch(exception& e) {
330                 m->errorOut(e, "GetSeqsCommand", "execute");
331                 exit(1);
332         }
333 }
334
335 //**********************************************************************************************************************
336 int GetSeqsCommand::readFasta(){
337         try {
338                 string thisOutputDir = outputDir;
339                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
340                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
341                 ofstream out;
342                 m->openOutputFile(outputFileName, out);
343                 
344                 
345                 ifstream in;
346                 m->openInputFile(fastafile, in);
347                 string name;
348                 
349                 bool wroteSomething = false;
350                 
351                 while(!in.eof()){
352                 
353                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
354                         
355                         Sequence currSeq(in);
356                         name = currSeq.getName();
357                         
358                         if (name != "") {
359                                 //if this name is in the accnos file
360                                 if (names.count(name) != 0) {
361                                         wroteSomething = true;
362                                         
363                                         currSeq.printSequence(out);
364                                 }
365                         }
366                         m->gobble(in);
367                 }
368                 in.close();     
369                 out.close();
370                 
371                 
372                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
373                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
374                 
375                 return 0;
376
377         }
378         catch(exception& e) {
379                 m->errorOut(e, "GetSeqsCommand", "readFasta");
380                 exit(1);
381         }
382 }
383 //**********************************************************************************************************************
384 int GetSeqsCommand::readQual(){
385         try {
386                 string thisOutputDir = outputDir;
387                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
388                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
389                 ofstream out;
390                 m->openOutputFile(outputFileName, out);
391                 
392                 
393                 ifstream in;
394                 m->openInputFile(qualfile, in);
395                 string name;
396                 
397                 bool wroteSomething = false;
398                 
399                 
400                 while(!in.eof()){       
401                         string saveName = "";
402                         string name = "";
403                         string scores = "";
404                         
405                         in >> name; 
406                                 
407                         if (name.length() != 0) { 
408                                 saveName = name.substr(1);
409                                 while (!in.eof())       {       
410                                         char c = in.get(); 
411                                         if (c == 10 || c == 13){        break;  }
412                                         else { name += c; }     
413                                 } 
414                                 m->gobble(in);
415                         }
416                         
417                         while(in){
418                                 char letter= in.get();
419                                 if(letter == '>'){      in.putback(letter);     break;  }
420                                 else{ scores += letter; }
421                         }
422                         
423                         m->gobble(in);
424                         
425                         if (names.count(saveName) != 0) {
426                                 wroteSomething = true;
427                                                 
428                                 out << name << endl << scores;
429                         }
430                         
431                         m->gobble(in);
432                 }
433                 in.close();
434                 out.close();
435                 
436                 
437                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
438                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
439                 
440                 return 0;
441                 
442         }
443         catch(exception& e) {
444                 m->errorOut(e, "GetSeqsCommand", "readQual");
445                 exit(1);
446         }
447 }
448 //**********************************************************************************************************************
449 int GetSeqsCommand::readList(){
450         try {
451                 string thisOutputDir = outputDir;
452                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
453                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
454                 ofstream out;
455                 m->openOutputFile(outputFileName, out);
456                 
457                 ifstream in;
458                 m->openInputFile(listfile, in);
459                 
460                 bool wroteSomething = false;
461                 
462                 while(!in.eof()){
463                         
464                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
465
466                         //read in list vector
467                         ListVector list(in);
468                         
469                         //make a new list vector
470                         ListVector newList;
471                         newList.setLabel(list.getLabel());
472                         
473                         //for each bin
474                         for (int i = 0; i < list.getNumBins(); i++) {
475                         
476                                 //parse out names that are in accnos file
477                                 string binnames = list.get(i);
478                                 
479                                 string newNames = "";
480                                 while (binnames.find_first_of(',') != -1) { 
481                                         string name = binnames.substr(0,binnames.find_first_of(','));
482                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
483                                         
484                                         //if that name is in the .accnos file, add it
485                                         if (names.count(name) != 0) {  newNames += name + ",";  }
486                                 }
487                         
488                                 //get last name
489                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
490
491                                 //if there are names in this bin add to new list
492                                 if (newNames != "") { 
493                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
494                                         newList.push_back(newNames);    
495                                 }
496                         }
497                                 
498                         //print new listvector
499                         if (newList.getNumBins() != 0) {
500                                 wroteSomething = true;
501                                 newList.print(out);
502                         }
503                         
504                         m->gobble(in);
505                 }
506                 in.close();     
507                 out.close();
508                 
509                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
510                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
511                 
512                 return 0;
513
514         }
515         catch(exception& e) {
516                 m->errorOut(e, "GetSeqsCommand", "readList");
517                 exit(1);
518         }
519 }
520 //**********************************************************************************************************************
521 int GetSeqsCommand::readName(){
522         try {
523                 string thisOutputDir = outputDir;
524                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
525                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
526                 ofstream out;
527                 m->openOutputFile(outputFileName, out);
528                 
529
530                 ifstream in;
531                 m->openInputFile(namefile, in);
532                 string name, firstCol, secondCol;
533                 
534                 bool wroteSomething = false;
535                 
536                 
537                 while(!in.eof()){
538                 
539                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
540
541                         in >> firstCol;                         
542                         in >> secondCol;
543                         
544                         string hold = "";
545                         if (dups) { hold = secondCol; }
546                         
547                         vector<string> parsedNames;
548                         m->splitAtComma(secondCol, parsedNames);
549                         
550                         vector<string> validSecond;
551                         for (int i = 0; i < parsedNames.size(); i++) {
552                                 if (names.count(parsedNames[i]) != 0) {
553                                         validSecond.push_back(parsedNames[i]);
554                                 }
555                         }
556
557                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
558                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
559                                 out << firstCol << '\t' << hold << endl;
560                                 wroteSomething = true;
561                         }else {
562                                 //if the name in the first column is in the set then print it and any other names in second column also in set
563                                 if (names.count(firstCol) != 0) {
564                                 
565                                         wroteSomething = true;
566                                         
567                                         out << firstCol << '\t';
568                                         
569                                         //you know you have at least one valid second since first column is valid
570                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
571                                         out << validSecond[validSecond.size()-1] << endl;
572                                         
573                                 
574                                 //make first name in set you come to first column and then add the remaining names to second column
575                                 }else {
576                                         //you want part of this row
577                                         if (validSecond.size() != 0) {
578                                         
579                                                 wroteSomething = true;
580                                                 
581                                                 out << validSecond[0] << '\t';
582                                         
583                                                 //you know you have at least one valid second since first column is valid
584                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
585                                                 out << validSecond[validSecond.size()-1] << endl;
586                                         }
587                                 }
588                         }
589                         m->gobble(in);
590                 }
591                 in.close();
592                 out.close();
593                 
594                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
595                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
596                 
597                 return 0;
598                 
599         }
600         catch(exception& e) {
601                 m->errorOut(e, "GetSeqsCommand", "readName");
602                 exit(1);
603         }
604 }
605
606 //**********************************************************************************************************************
607 int GetSeqsCommand::readGroup(){
608         try {
609                 string thisOutputDir = outputDir;
610                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
611                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
612                 ofstream out;
613                 m->openOutputFile(outputFileName, out);
614                 
615
616                 ifstream in;
617                 m->openInputFile(groupfile, in);
618                 string name, group;
619                 
620                 bool wroteSomething = false;
621                 
622                 while(!in.eof()){
623
624                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
625
626
627                         in >> name;                             //read from first column
628                         in >> group;                    //read from second column
629                         
630                         //if this name is in the accnos file
631                         if (names.count(name) != 0) {
632                                 wroteSomething = true;
633                                 
634                                 out << name << '\t' << group << endl;
635                         }
636                                         
637                         m->gobble(in);
638                 }
639                 in.close();
640                 out.close();
641                 
642                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
643                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
644                 
645                 return 0;
646
647         }
648         catch(exception& e) {
649                 m->errorOut(e, "GetSeqsCommand", "readGroup");
650                 exit(1);
651         }
652 }
653 //**********************************************************************************************************************
654 int GetSeqsCommand::readTax(){
655         try {
656                 string thisOutputDir = outputDir;
657                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
658                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
659                 ofstream out;
660                 m->openOutputFile(outputFileName, out);
661                 
662                 ifstream in;
663                 m->openInputFile(taxfile, in);
664                 string name, tax;
665                 
666                 bool wroteSomething = false;
667                 
668                 while(!in.eof()){
669
670                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
671
672                         in >> name;                             //read from first column
673                         in >> tax;                      //read from second column
674                         
675                         //if this name is in the accnos file
676                         if (names.count(name) != 0) {
677                                 wroteSomething = true;
678                                 
679                                 out << name << '\t' << tax << endl;
680                         }
681                                         
682                         m->gobble(in);
683                 }
684                 in.close();
685                 out.close();
686                 
687                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
688                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
689                         
690                 return 0;
691
692         }
693         catch(exception& e) {
694                 m->errorOut(e, "GetSeqsCommand", "readTax");
695                 exit(1);
696         }
697 }
698 //**********************************************************************************************************************
699 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
700 int GetSeqsCommand::readAlign(){
701         try {
702                 string thisOutputDir = outputDir;
703                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
704                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
705                 ofstream out;
706                 m->openOutputFile(outputFileName, out);
707                 
708
709                 ifstream in;
710                 m->openInputFile(alignfile, in);
711                 string name, junk;
712                 
713                 bool wroteSomething = false;
714                 
715                 //read column headers
716                 for (int i = 0; i < 16; i++) {  
717                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
718                         else                    {       break;                  }
719                 }
720                 out << endl;
721                 
722                 while(!in.eof()){
723                 
724                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
725
726
727                         in >> name;                             //read from first column
728                         
729                         //if this name is in the accnos file
730                         if (names.count(name) != 0) {
731                                 wroteSomething = true;
732                                 
733                                 out << name << '\t';
734                                 
735                                 //read rest
736                                 for (int i = 0; i < 15; i++) {  
737                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
738                                         else                    {       break;                  }
739                                 }
740                                 out << endl;
741                                 
742                         }else {//still read just don't do anything with it
743                                 //read rest
744                                 for (int i = 0; i < 15; i++) {  
745                                         if (!in.eof())  {       in >> junk;             }
746                                         else                    {       break;                  }
747                                 }
748                         }
749                         
750                         m->gobble(in);
751                 }
752                 in.close();
753                 out.close();
754                 
755                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
756                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
757                 
758                 return 0;
759                 
760         }
761         catch(exception& e) {
762                 m->errorOut(e, "GetSeqsCommand", "readAlign");
763                 exit(1);
764         }
765 }
766 //**********************************************************************************************************************
767
768 int GetSeqsCommand::readAccnos(){
769         try {
770                 
771                 ifstream in;
772                 m->openInputFile(accnosfile, in);
773                 string name;
774                 
775                 while(!in.eof()){
776                         in >> name;
777                                                 
778                         names.insert(name);
779                         
780                         m->gobble(in);
781                 }
782                 in.close();     
783                 
784                 return 0;
785
786         }
787         catch(exception& e) {
788                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
789                 exit(1);
790         }
791 }
792 //**********************************************************************************************************************
793
794 int GetSeqsCommand::compareAccnos(){
795         try {
796                 
797                 string thisOutputDir = outputDir;
798                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
799                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
800                 ofstream out;
801                 m->openOutputFile(outputFileName, out);
802                 
803                 ifstream in;
804                 m->openInputFile(accnosfile2, in);
805                 string name;
806                 
807                 set<string> namesAccnos2;
808                 set<string> namesDups;
809                 set<string> namesAccnos = names;
810                 
811                 map<string, int> nameCount;
812                 
813                 if (namefile != "") {
814                         ifstream inName;
815                         m->openInputFile(namefile, inName);
816                         
817                         
818                         while(!inName.eof()){
819                                 
820                                 if (m->control_pressed) { inName.close(); return 0; }
821                                 
822                                 string thisname, repnames;
823                                 
824                                 inName >> thisname;             m->gobble(inName);              //read from first column
825                                 inName >> repnames;                     //read from second column
826                                 
827                                 int num = m->getNumNames(repnames);
828                                 nameCount[thisname] = num;
829                                 
830                                 m->gobble(inName);
831                         }
832                         inName.close(); 
833                 }
834                 
835                 while(!in.eof()){
836                         in >> name;
837                         
838                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
839                                 int pos = name.find_last_of('_');
840                                 string tempName = name;
841                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
842                                 if (namesAccnos.count(tempName) == 0){
843                                         namesAccnos2.insert(name);
844                                 }else { //you are in both so erase
845                                         namesAccnos.erase(name);
846                                         namesDups.insert(name);
847                                 }
848                         }else { //you are in both so erase
849                                 namesAccnos.erase(name);
850                                 namesDups.insert(name);
851                         }
852                         
853                         m->gobble(in);
854                 }
855                 in.close();     
856                 
857                 out << "Names in both files : " + toString(namesDups.size()) << endl;
858                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
859                 
860                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
861                         out << (*it);
862                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
863                         out << endl;
864                 }
865                 
866                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
867                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
868                 
869                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
870                         out << (*it);
871                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
872                         out << endl;
873                 }
874                 
875                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
876                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
877                 
878                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
879                         out << (*it);
880                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
881                         out << endl;
882                 }
883
884                 out.close(); 
885                 
886                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
887                 
888                 return 0;
889                 
890         }
891         catch(exception& e) {
892                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
893                 exit(1);
894         }
895 }
896
897
898 //**********************************************************************************************************************
899