]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
added citation function to commands
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "GetSeqsCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string GetSeqsCommand::getHelpString(){ 
40         try {
41                 string helpString = "";
42                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
43                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
44                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
45                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
46                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
47                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57 //**********************************************************************************************************************
58 GetSeqsCommand::GetSeqsCommand(){       
59         try {
60                 abort = true; calledHelp = true;
61                 setParameters();
62                 vector<string> tempOutNames;
63                 outputTypes["fasta"] = tempOutNames;
64                 outputTypes["taxonomy"] = tempOutNames;
65                 outputTypes["name"] = tempOutNames;
66                 outputTypes["group"] = tempOutNames;
67                 outputTypes["alignreport"] = tempOutNames;
68                 outputTypes["list"] = tempOutNames;
69                 outputTypes["qfile"] = tempOutNames;
70                 outputTypes["accnosreport"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 GetSeqsCommand::GetSeqsCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;   
81                                 
82                 //allow user to run help
83                 if(option == "help") { help(); abort = true; calledHelp = true; }
84                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
85                 
86                 else {
87                         vector<string> myArray = setParameters();
88                         
89                         OptionParser parser(option);
90                         map<string,string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string,string>::iterator it;
94                         
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["fasta"] = tempOutNames;
103                         outputTypes["taxonomy"] = tempOutNames;
104                         outputTypes["name"] = tempOutNames;
105                         outputTypes["group"] = tempOutNames;
106                         outputTypes["alignreport"] = tempOutNames;
107                         outputTypes["list"] = tempOutNames;
108                         outputTypes["qfile"] = tempOutNames;
109                         outputTypes["accnosreport"] = tempOutNames;
110                         
111                         //if the user changes the output directory command factory will send this info to us in the output parameter 
112                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("alignreport");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
125                                 }
126                                 
127                                 it = parameters.find("fasta");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
133                                 }
134                                 
135                                 it = parameters.find("accnos");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
141                                 }
142                                 
143                                 it = parameters.find("accnos2");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
149                                 }
150                                 
151                                 it = parameters.find("list");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
157                                 }
158                                 
159                                 it = parameters.find("name");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
165                                 }
166                                 
167                                 it = parameters.find("group");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
173                                 }
174                                 
175                                 it = parameters.find("taxonomy");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
181                                 }
182                                 
183                                 it = parameters.find("qfile");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
189                                 }
190                         }
191
192                         
193                         //check for required parameters
194                         accnosfile = validParameter.validFile(parameters, "accnos", true);
195                         if (accnosfile == "not open") { abort = true; }
196                         else if (accnosfile == "not found") {  
197                                 accnosfile = m->getAccnosFile(); 
198                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
199                                 else { 
200                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
201                                         abort = true;
202                                 } 
203                         }       
204                         
205                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
206                         
207                         fastafile = validParameter.validFile(parameters, "fasta", true);
208                         if (fastafile == "not open") { abort = true; }
209                         else if (fastafile == "not found") {  fastafile = "";  }        
210                         
211                         namefile = validParameter.validFile(parameters, "name", true);
212                         if (namefile == "not open") { abort = true; }
213                         else if (namefile == "not found") {  namefile = "";  }  
214                         
215                         groupfile = validParameter.validFile(parameters, "group", true);
216                         if (groupfile == "not open") { abort = true; }
217                         else if (groupfile == "not found") {  groupfile = "";  }        
218                         
219                         alignfile = validParameter.validFile(parameters, "alignreport", true);
220                         if (alignfile == "not open") { abort = true; }
221                         else if (alignfile == "not found") {  alignfile = "";  }
222                         
223                         listfile = validParameter.validFile(parameters, "list", true);
224                         if (listfile == "not open") { abort = true; }
225                         else if (listfile == "not found") {  listfile = "";  }
226                         
227                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
228                         if (taxfile == "not open") { abort = true; }
229                         else if (taxfile == "not found") {  taxfile = "";  }
230                         
231                         qualfile = validParameter.validFile(parameters, "qfile", true);
232                         if (qualfile == "not open") { abort = true; }
233                         else if (qualfile == "not found") {  qualfile = "";  }
234                         
235                         string usedDups = "true";
236                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
237                         dups = m->isTrue(temp);
238                         
239                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
240                 }
241
242         }
243         catch(exception& e) {
244                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
245                 exit(1);
246         }
247 }
248 //**********************************************************************************************************************
249
250 int GetSeqsCommand::execute(){
251         try {
252                 
253                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
254                 
255                 //get names you want to keep
256                 readAccnos();
257                 
258                 if (m->control_pressed) { return 0; }
259                 
260                 //read through the correct file and output lines you want to keep
261                 if (namefile != "")                     {               readName();                     }
262                 if (fastafile != "")            {               readFasta();            }
263                 if (groupfile != "")            {               readGroup();            }
264                 if (alignfile != "")            {               readAlign();            }
265                 if (listfile != "")                     {               readList();                     }
266                 if (taxfile != "")                      {               readTax();                      }
267                 if (qualfile != "")                     {               readQual();                     }
268                 if (accnosfile2 != "")          {               compareAccnos();        }
269                 
270                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
271                 
272                 m->mothurOut("Selected " + toString(names.size()) + " sequences."); m->mothurOutEndLine();
273                 
274                 if (outputNames.size() != 0) {
275                         m->mothurOutEndLine();
276                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
277                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
278                         m->mothurOutEndLine();
279                         
280                         //set fasta file as new current fastafile
281                         string current = "";
282                         itTypes = outputTypes.find("fasta");
283                         if (itTypes != outputTypes.end()) {
284                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
285                         }
286                         
287                         itTypes = outputTypes.find("name");
288                         if (itTypes != outputTypes.end()) {
289                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
290                         }
291                         
292                         itTypes = outputTypes.find("group");
293                         if (itTypes != outputTypes.end()) {
294                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
295                         }
296                         
297                         itTypes = outputTypes.find("list");
298                         if (itTypes != outputTypes.end()) {
299                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
300                         }
301                         
302                         itTypes = outputTypes.find("taxonomy");
303                         if (itTypes != outputTypes.end()) {
304                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
305                         }
306                         
307                         itTypes = outputTypes.find("qfile");
308                         if (itTypes != outputTypes.end()) {
309                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
310                         }
311                         
312                 }
313                 
314                 return 0;               
315         }
316
317         catch(exception& e) {
318                 m->errorOut(e, "GetSeqsCommand", "execute");
319                 exit(1);
320         }
321 }
322
323 //**********************************************************************************************************************
324 int GetSeqsCommand::readFasta(){
325         try {
326                 string thisOutputDir = outputDir;
327                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
328                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
329                 ofstream out;
330                 m->openOutputFile(outputFileName, out);
331                 
332                 
333                 ifstream in;
334                 m->openInputFile(fastafile, in);
335                 string name;
336                 
337                 bool wroteSomething = false;
338                 
339                 while(!in.eof()){
340                 
341                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
342                         
343                         Sequence currSeq(in);
344                         name = currSeq.getName();
345                         
346                         if (name != "") {
347                                 //if this name is in the accnos file
348                                 if (names.count(name) != 0) {
349                                         wroteSomething = true;
350                                         
351                                         currSeq.printSequence(out);
352                                 }
353                         }
354                         m->gobble(in);
355                 }
356                 in.close();     
357                 out.close();
358                 
359                 
360                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
361                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
362                 
363                 return 0;
364
365         }
366         catch(exception& e) {
367                 m->errorOut(e, "GetSeqsCommand", "readFasta");
368                 exit(1);
369         }
370 }
371 //**********************************************************************************************************************
372 int GetSeqsCommand::readQual(){
373         try {
374                 string thisOutputDir = outputDir;
375                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
376                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
377                 ofstream out;
378                 m->openOutputFile(outputFileName, out);
379                 
380                 
381                 ifstream in;
382                 m->openInputFile(qualfile, in);
383                 string name;
384                 
385                 bool wroteSomething = false;
386                 
387                 
388                 while(!in.eof()){       
389                         string saveName = "";
390                         string name = "";
391                         string scores = "";
392                         
393                         in >> name; 
394                                 
395                         if (name.length() != 0) { 
396                                 saveName = name.substr(1);
397                                 while (!in.eof())       {       
398                                         char c = in.get(); 
399                                         if (c == 10 || c == 13){        break;  }
400                                         else { name += c; }     
401                                 } 
402                                 m->gobble(in);
403                         }
404                         
405                         while(in){
406                                 char letter= in.get();
407                                 if(letter == '>'){      in.putback(letter);     break;  }
408                                 else{ scores += letter; }
409                         }
410                         
411                         m->gobble(in);
412                         
413                         if (names.count(saveName) != 0) {
414                                 wroteSomething = true;
415                                                 
416                                 out << name << endl << scores;
417                         }
418                         
419                         m->gobble(in);
420                 }
421                 in.close();
422                 out.close();
423                 
424                 
425                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
426                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
427                 
428                 return 0;
429                 
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "GetSeqsCommand", "readQual");
433                 exit(1);
434         }
435 }
436 //**********************************************************************************************************************
437 int GetSeqsCommand::readList(){
438         try {
439                 string thisOutputDir = outputDir;
440                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
441                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
442                 ofstream out;
443                 m->openOutputFile(outputFileName, out);
444                 
445                 ifstream in;
446                 m->openInputFile(listfile, in);
447                 
448                 bool wroteSomething = false;
449                 
450                 while(!in.eof()){
451                         
452                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
453
454                         //read in list vector
455                         ListVector list(in);
456                         
457                         //make a new list vector
458                         ListVector newList;
459                         newList.setLabel(list.getLabel());
460                         
461                         //for each bin
462                         for (int i = 0; i < list.getNumBins(); i++) {
463                         
464                                 //parse out names that are in accnos file
465                                 string binnames = list.get(i);
466                                 
467                                 string newNames = "";
468                                 while (binnames.find_first_of(',') != -1) { 
469                                         string name = binnames.substr(0,binnames.find_first_of(','));
470                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
471                                         
472                                         //if that name is in the .accnos file, add it
473                                         if (names.count(name) != 0) {  newNames += name + ",";  }
474                                 }
475                         
476                                 //get last name
477                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
478
479                                 //if there are names in this bin add to new list
480                                 if (newNames != "") { 
481                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
482                                         newList.push_back(newNames);    
483                                 }
484                         }
485                                 
486                         //print new listvector
487                         if (newList.getNumBins() != 0) {
488                                 wroteSomething = true;
489                                 newList.print(out);
490                         }
491                         
492                         m->gobble(in);
493                 }
494                 in.close();     
495                 out.close();
496                 
497                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
498                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
499                 
500                 return 0;
501
502         }
503         catch(exception& e) {
504                 m->errorOut(e, "GetSeqsCommand", "readList");
505                 exit(1);
506         }
507 }
508 //**********************************************************************************************************************
509 int GetSeqsCommand::readName(){
510         try {
511                 string thisOutputDir = outputDir;
512                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
513                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
514                 ofstream out;
515                 m->openOutputFile(outputFileName, out);
516                 
517
518                 ifstream in;
519                 m->openInputFile(namefile, in);
520                 string name, firstCol, secondCol;
521                 
522                 bool wroteSomething = false;
523                 
524                 
525                 while(!in.eof()){
526                 
527                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
528
529                         in >> firstCol;                         
530                         in >> secondCol;
531                         
532                         string hold = "";
533                         if (dups) { hold = secondCol; }
534                         
535                         vector<string> parsedNames;
536                         m->splitAtComma(secondCol, parsedNames);
537                         
538                         vector<string> validSecond;
539                         for (int i = 0; i < parsedNames.size(); i++) {
540                                 if (names.count(parsedNames[i]) != 0) {
541                                         validSecond.push_back(parsedNames[i]);
542                                 }
543                         }
544
545                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
546                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
547                                 out << firstCol << '\t' << hold << endl;
548                                 wroteSomething = true;
549                         }else {
550                                 //if the name in the first column is in the set then print it and any other names in second column also in set
551                                 if (names.count(firstCol) != 0) {
552                                 
553                                         wroteSomething = true;
554                                         
555                                         out << firstCol << '\t';
556                                         
557                                         //you know you have at least one valid second since first column is valid
558                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
559                                         out << validSecond[validSecond.size()-1] << endl;
560                                         
561                                 
562                                 //make first name in set you come to first column and then add the remaining names to second column
563                                 }else {
564                                         //you want part of this row
565                                         if (validSecond.size() != 0) {
566                                         
567                                                 wroteSomething = true;
568                                                 
569                                                 out << validSecond[0] << '\t';
570                                         
571                                                 //you know you have at least one valid second since first column is valid
572                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
573                                                 out << validSecond[validSecond.size()-1] << endl;
574                                         }
575                                 }
576                         }
577                         m->gobble(in);
578                 }
579                 in.close();
580                 out.close();
581                 
582                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
583                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
584                 
585                 return 0;
586                 
587         }
588         catch(exception& e) {
589                 m->errorOut(e, "GetSeqsCommand", "readName");
590                 exit(1);
591         }
592 }
593
594 //**********************************************************************************************************************
595 int GetSeqsCommand::readGroup(){
596         try {
597                 string thisOutputDir = outputDir;
598                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
599                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
600                 ofstream out;
601                 m->openOutputFile(outputFileName, out);
602                 
603
604                 ifstream in;
605                 m->openInputFile(groupfile, in);
606                 string name, group;
607                 
608                 bool wroteSomething = false;
609                 
610                 while(!in.eof()){
611
612                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
613
614
615                         in >> name;                             //read from first column
616                         in >> group;                    //read from second column
617                         
618                         //if this name is in the accnos file
619                         if (names.count(name) != 0) {
620                                 wroteSomething = true;
621                                 
622                                 out << name << '\t' << group << endl;
623                         }
624                                         
625                         m->gobble(in);
626                 }
627                 in.close();
628                 out.close();
629                 
630                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
631                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
632                 
633                 return 0;
634
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "GetSeqsCommand", "readGroup");
638                 exit(1);
639         }
640 }
641 //**********************************************************************************************************************
642 int GetSeqsCommand::readTax(){
643         try {
644                 string thisOutputDir = outputDir;
645                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
646                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
647                 ofstream out;
648                 m->openOutputFile(outputFileName, out);
649                 
650                 ifstream in;
651                 m->openInputFile(taxfile, in);
652                 string name, tax;
653                 
654                 bool wroteSomething = false;
655                 
656                 while(!in.eof()){
657
658                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
659
660                         in >> name;                             //read from first column
661                         in >> tax;                      //read from second column
662                         
663                         //if this name is in the accnos file
664                         if (names.count(name) != 0) {
665                                 wroteSomething = true;
666                                 
667                                 out << name << '\t' << tax << endl;
668                         }
669                                         
670                         m->gobble(in);
671                 }
672                 in.close();
673                 out.close();
674                 
675                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
676                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
677                         
678                 return 0;
679
680         }
681         catch(exception& e) {
682                 m->errorOut(e, "GetSeqsCommand", "readTax");
683                 exit(1);
684         }
685 }
686 //**********************************************************************************************************************
687 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
688 int GetSeqsCommand::readAlign(){
689         try {
690                 string thisOutputDir = outputDir;
691                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
692                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
693                 ofstream out;
694                 m->openOutputFile(outputFileName, out);
695                 
696
697                 ifstream in;
698                 m->openInputFile(alignfile, in);
699                 string name, junk;
700                 
701                 bool wroteSomething = false;
702                 
703                 //read column headers
704                 for (int i = 0; i < 16; i++) {  
705                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
706                         else                    {       break;                  }
707                 }
708                 out << endl;
709                 
710                 while(!in.eof()){
711                 
712                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
713
714
715                         in >> name;                             //read from first column
716                         
717                         //if this name is in the accnos file
718                         if (names.count(name) != 0) {
719                                 wroteSomething = true;
720                                 
721                                 out << name << '\t';
722                                 
723                                 //read rest
724                                 for (int i = 0; i < 15; i++) {  
725                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
726                                         else                    {       break;                  }
727                                 }
728                                 out << endl;
729                                 
730                         }else {//still read just don't do anything with it
731                                 //read rest
732                                 for (int i = 0; i < 15; i++) {  
733                                         if (!in.eof())  {       in >> junk;             }
734                                         else                    {       break;                  }
735                                 }
736                         }
737                         
738                         m->gobble(in);
739                 }
740                 in.close();
741                 out.close();
742                 
743                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
744                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
745                 
746                 return 0;
747                 
748         }
749         catch(exception& e) {
750                 m->errorOut(e, "GetSeqsCommand", "readAlign");
751                 exit(1);
752         }
753 }
754 //**********************************************************************************************************************
755
756 int GetSeqsCommand::readAccnos(){
757         try {
758                 
759                 ifstream in;
760                 m->openInputFile(accnosfile, in);
761                 string name;
762                 
763                 while(!in.eof()){
764                         in >> name;
765                                                 
766                         names.insert(name);
767                         
768                         m->gobble(in);
769                 }
770                 in.close();     
771                 
772                 return 0;
773
774         }
775         catch(exception& e) {
776                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
777                 exit(1);
778         }
779 }
780 //**********************************************************************************************************************
781
782 int GetSeqsCommand::compareAccnos(){
783         try {
784                 
785                 string thisOutputDir = outputDir;
786                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
787                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
788                 ofstream out;
789                 m->openOutputFile(outputFileName, out);
790                 
791                 ifstream in;
792                 m->openInputFile(accnosfile2, in);
793                 string name;
794                 
795                 set<string> namesAccnos2;
796                 set<string> namesDups;
797                 set<string> namesAccnos = names;
798                 
799                 map<string, int> nameCount;
800                 
801                 if (namefile != "") {
802                         ifstream inName;
803                         m->openInputFile(namefile, inName);
804                         
805                         
806                         while(!inName.eof()){
807                                 
808                                 if (m->control_pressed) { inName.close(); return 0; }
809                                 
810                                 string thisname, repnames;
811                                 
812                                 inName >> thisname;             m->gobble(inName);              //read from first column
813                                 inName >> repnames;                     //read from second column
814                                 
815                                 int num = m->getNumNames(repnames);
816                                 nameCount[thisname] = num;
817                                 
818                                 m->gobble(inName);
819                         }
820                         inName.close(); 
821                 }
822                 
823                 while(!in.eof()){
824                         in >> name;
825                         
826                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
827                                 namesAccnos2.insert(name);
828                         }else { //you are in both so erase
829                                 namesAccnos.erase(name);
830                                 namesDups.insert(name);
831                         }
832                         
833                         m->gobble(in);
834                 }
835                 in.close();     
836                 
837                 out << "Names in both files : " + toString(namesDups.size()) << endl;
838                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
839                 
840                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
841                         out << (*it);
842                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
843                         out << endl;
844                 }
845                 
846                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
847                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
848                 
849                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
850                         out << (*it);
851                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
852                         out << endl;
853                 }
854                 
855                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
856                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
857                 
858                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
859                         out << (*it);
860                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
861                         out << endl;
862                 }
863
864                 out.close(); 
865                 
866                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
867                 
868                 return 0;
869                 
870         }
871         catch(exception& e) {
872                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
873                 exit(1);
874         }
875 }
876
877
878 //**********************************************************************************************************************
879