]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
a4697ca3fded8c34ec5320630a588e243601373d
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
45                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 GetSeqsCommand::GetSeqsCommand(){       
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71                 outputTypes["accnosreport"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 GetSeqsCommand::GetSeqsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["alignreport"] = tempOutNames;
108                         outputTypes["list"] = tempOutNames;
109                         outputTypes["qfile"] = tempOutNames;
110                         outputTypes["accnosreport"] = tempOutNames;
111                         
112                         //if the user changes the output directory command factory will send this info to us in the output parameter 
113                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("alignreport");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
126                                 }
127                                 
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("accnos");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
142                                 }
143                                 
144                                 it = parameters.find("accnos2");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
150                                 }
151                                 
152                                 it = parameters.find("list");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                                 
176                                 it = parameters.find("taxonomy");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
182                                 }
183                                 
184                                 it = parameters.find("qfile");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
190                                 }
191                         }
192
193                         
194                         //check for required parameters
195                         accnosfile = validParameter.validFile(parameters, "accnos", true);
196                         if (accnosfile == "not open") { abort = true; }
197                         else if (accnosfile == "not found") {  
198                                 accnosfile = m->getAccnosFile(); 
199                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
200                                 else { 
201                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
202                                         abort = true;
203                                 } 
204                         }       
205                         
206                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
207                         
208                         fastafile = validParameter.validFile(parameters, "fasta", true);
209                         if (fastafile == "not open") { abort = true; }
210                         else if (fastafile == "not found") {  fastafile = "";  }        
211                         
212                         namefile = validParameter.validFile(parameters, "name", true);
213                         if (namefile == "not open") { abort = true; }
214                         else if (namefile == "not found") {  namefile = "";  }  
215                         
216                         groupfile = validParameter.validFile(parameters, "group", true);
217                         if (groupfile == "not open") { abort = true; }
218                         else if (groupfile == "not found") {  groupfile = "";  }        
219                         
220                         alignfile = validParameter.validFile(parameters, "alignreport", true);
221                         if (alignfile == "not open") { abort = true; }
222                         else if (alignfile == "not found") {  alignfile = "";  }
223                         
224                         listfile = validParameter.validFile(parameters, "list", true);
225                         if (listfile == "not open") { abort = true; }
226                         else if (listfile == "not found") {  listfile = "";  }
227                         
228                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
229                         if (taxfile == "not open") { abort = true; }
230                         else if (taxfile == "not found") {  taxfile = "";  }
231                         
232                         qualfile = validParameter.validFile(parameters, "qfile", true);
233                         if (qualfile == "not open") { abort = true; }
234                         else if (qualfile == "not found") {  qualfile = "";  }
235                         
236                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
237                         if (accnosfile2 == "not open") { abort = true; }
238                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
239                         
240                         
241                         string usedDups = "true";
242                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
243                         dups = m->isTrue(temp);
244                         
245                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
246                 }
247
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
251                 exit(1);
252         }
253 }
254 //**********************************************************************************************************************
255
256 int GetSeqsCommand::execute(){
257         try {
258                 
259                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
260                 
261                 //get names you want to keep
262                 readAccnos();
263                 
264                 if (m->control_pressed) { return 0; }
265                 
266                 //read through the correct file and output lines you want to keep
267                 if (namefile != "")                     {               readName();                     }
268                 if (fastafile != "")            {               readFasta();            }
269                 if (groupfile != "")            {               readGroup();            }
270                 if (alignfile != "")            {               readAlign();            }
271                 if (listfile != "")                     {               readList();                     }
272                 if (taxfile != "")                      {               readTax();                      }
273                 if (qualfile != "")                     {               readQual();                     }
274                 if (accnosfile2 != "")          {               compareAccnos();        }
275                 
276                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str());  } return 0; }
277                 
278                 m->mothurOut("Selected " + toString(names.size()) + " sequences."); m->mothurOutEndLine();
279                 
280                 if (outputNames.size() != 0) {
281                         m->mothurOutEndLine();
282                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
283                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
284                         m->mothurOutEndLine();
285                         
286                         //set fasta file as new current fastafile
287                         string current = "";
288                         itTypes = outputTypes.find("fasta");
289                         if (itTypes != outputTypes.end()) {
290                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
291                         }
292                         
293                         itTypes = outputTypes.find("name");
294                         if (itTypes != outputTypes.end()) {
295                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
296                         }
297                         
298                         itTypes = outputTypes.find("group");
299                         if (itTypes != outputTypes.end()) {
300                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
301                         }
302                         
303                         itTypes = outputTypes.find("list");
304                         if (itTypes != outputTypes.end()) {
305                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
306                         }
307                         
308                         itTypes = outputTypes.find("taxonomy");
309                         if (itTypes != outputTypes.end()) {
310                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
311                         }
312                         
313                         itTypes = outputTypes.find("qfile");
314                         if (itTypes != outputTypes.end()) {
315                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
316                         }
317                         
318                 }
319                 
320                 return 0;               
321         }
322
323         catch(exception& e) {
324                 m->errorOut(e, "GetSeqsCommand", "execute");
325                 exit(1);
326         }
327 }
328
329 //**********************************************************************************************************************
330 int GetSeqsCommand::readFasta(){
331         try {
332                 string thisOutputDir = outputDir;
333                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
334                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
335                 ofstream out;
336                 m->openOutputFile(outputFileName, out);
337                 
338                 
339                 ifstream in;
340                 m->openInputFile(fastafile, in);
341                 string name;
342                 
343                 bool wroteSomething = false;
344                 
345                 while(!in.eof()){
346                 
347                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
348                         
349                         Sequence currSeq(in);
350                         name = currSeq.getName();
351                         
352                         if (name != "") {
353                                 //if this name is in the accnos file
354                                 if (names.count(name) != 0) {
355                                         wroteSomething = true;
356                                         
357                                         currSeq.printSequence(out);
358                                 }
359                         }
360                         m->gobble(in);
361                 }
362                 in.close();     
363                 out.close();
364                 
365                 
366                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
367                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
368                 
369                 return 0;
370
371         }
372         catch(exception& e) {
373                 m->errorOut(e, "GetSeqsCommand", "readFasta");
374                 exit(1);
375         }
376 }
377 //**********************************************************************************************************************
378 int GetSeqsCommand::readQual(){
379         try {
380                 string thisOutputDir = outputDir;
381                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
382                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
383                 ofstream out;
384                 m->openOutputFile(outputFileName, out);
385                 
386                 
387                 ifstream in;
388                 m->openInputFile(qualfile, in);
389                 string name;
390                 
391                 bool wroteSomething = false;
392                 
393                 
394                 while(!in.eof()){       
395                         string saveName = "";
396                         string name = "";
397                         string scores = "";
398                         
399                         in >> name; 
400                                 
401                         if (name.length() != 0) { 
402                                 saveName = name.substr(1);
403                                 while (!in.eof())       {       
404                                         char c = in.get(); 
405                                         if (c == 10 || c == 13){        break;  }
406                                         else { name += c; }     
407                                 } 
408                                 m->gobble(in);
409                         }
410                         
411                         while(in){
412                                 char letter= in.get();
413                                 if(letter == '>'){      in.putback(letter);     break;  }
414                                 else{ scores += letter; }
415                         }
416                         
417                         m->gobble(in);
418                         
419                         if (names.count(saveName) != 0) {
420                                 wroteSomething = true;
421                                                 
422                                 out << name << endl << scores;
423                         }
424                         
425                         m->gobble(in);
426                 }
427                 in.close();
428                 out.close();
429                 
430                 
431                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
432                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
433                 
434                 return 0;
435                 
436         }
437         catch(exception& e) {
438                 m->errorOut(e, "GetSeqsCommand", "readQual");
439                 exit(1);
440         }
441 }
442 //**********************************************************************************************************************
443 int GetSeqsCommand::readList(){
444         try {
445                 string thisOutputDir = outputDir;
446                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
447                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
448                 ofstream out;
449                 m->openOutputFile(outputFileName, out);
450                 
451                 ifstream in;
452                 m->openInputFile(listfile, in);
453                 
454                 bool wroteSomething = false;
455                 
456                 while(!in.eof()){
457                         
458                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
459
460                         //read in list vector
461                         ListVector list(in);
462                         
463                         //make a new list vector
464                         ListVector newList;
465                         newList.setLabel(list.getLabel());
466                         
467                         //for each bin
468                         for (int i = 0; i < list.getNumBins(); i++) {
469                         
470                                 //parse out names that are in accnos file
471                                 string binnames = list.get(i);
472                                 
473                                 string newNames = "";
474                                 while (binnames.find_first_of(',') != -1) { 
475                                         string name = binnames.substr(0,binnames.find_first_of(','));
476                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
477                                         
478                                         //if that name is in the .accnos file, add it
479                                         if (names.count(name) != 0) {  newNames += name + ",";  }
480                                 }
481                         
482                                 //get last name
483                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
484
485                                 //if there are names in this bin add to new list
486                                 if (newNames != "") { 
487                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
488                                         newList.push_back(newNames);    
489                                 }
490                         }
491                                 
492                         //print new listvector
493                         if (newList.getNumBins() != 0) {
494                                 wroteSomething = true;
495                                 newList.print(out);
496                         }
497                         
498                         m->gobble(in);
499                 }
500                 in.close();     
501                 out.close();
502                 
503                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
504                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
505                 
506                 return 0;
507
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "GetSeqsCommand", "readList");
511                 exit(1);
512         }
513 }
514 //**********************************************************************************************************************
515 int GetSeqsCommand::readName(){
516         try {
517                 string thisOutputDir = outputDir;
518                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
519                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
520                 ofstream out;
521                 m->openOutputFile(outputFileName, out);
522                 
523
524                 ifstream in;
525                 m->openInputFile(namefile, in);
526                 string name, firstCol, secondCol;
527                 
528                 bool wroteSomething = false;
529                 
530                 
531                 while(!in.eof()){
532                 
533                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
534
535                         in >> firstCol;                         
536                         in >> secondCol;
537                         
538                         string hold = "";
539                         if (dups) { hold = secondCol; }
540                         
541                         vector<string> parsedNames;
542                         m->splitAtComma(secondCol, parsedNames);
543                         
544                         vector<string> validSecond;
545                         for (int i = 0; i < parsedNames.size(); i++) {
546                                 if (names.count(parsedNames[i]) != 0) {
547                                         validSecond.push_back(parsedNames[i]);
548                                 }
549                         }
550
551                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
552                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
553                                 out << firstCol << '\t' << hold << endl;
554                                 wroteSomething = true;
555                         }else {
556                                 //if the name in the first column is in the set then print it and any other names in second column also in set
557                                 if (names.count(firstCol) != 0) {
558                                 
559                                         wroteSomething = true;
560                                         
561                                         out << firstCol << '\t';
562                                         
563                                         //you know you have at least one valid second since first column is valid
564                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
565                                         out << validSecond[validSecond.size()-1] << endl;
566                                         
567                                 
568                                 //make first name in set you come to first column and then add the remaining names to second column
569                                 }else {
570                                         //you want part of this row
571                                         if (validSecond.size() != 0) {
572                                         
573                                                 wroteSomething = true;
574                                                 
575                                                 out << validSecond[0] << '\t';
576                                         
577                                                 //you know you have at least one valid second since first column is valid
578                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
579                                                 out << validSecond[validSecond.size()-1] << endl;
580                                         }
581                                 }
582                         }
583                         m->gobble(in);
584                 }
585                 in.close();
586                 out.close();
587                 
588                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
589                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
590                 
591                 return 0;
592                 
593         }
594         catch(exception& e) {
595                 m->errorOut(e, "GetSeqsCommand", "readName");
596                 exit(1);
597         }
598 }
599
600 //**********************************************************************************************************************
601 int GetSeqsCommand::readGroup(){
602         try {
603                 string thisOutputDir = outputDir;
604                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
605                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
606                 ofstream out;
607                 m->openOutputFile(outputFileName, out);
608                 
609
610                 ifstream in;
611                 m->openInputFile(groupfile, in);
612                 string name, group;
613                 
614                 bool wroteSomething = false;
615                 
616                 while(!in.eof()){
617
618                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
619
620
621                         in >> name;                             //read from first column
622                         in >> group;                    //read from second column
623                         
624                         //if this name is in the accnos file
625                         if (names.count(name) != 0) {
626                                 wroteSomething = true;
627                                 
628                                 out << name << '\t' << group << endl;
629                         }
630                                         
631                         m->gobble(in);
632                 }
633                 in.close();
634                 out.close();
635                 
636                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
637                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
638                 
639                 return 0;
640
641         }
642         catch(exception& e) {
643                 m->errorOut(e, "GetSeqsCommand", "readGroup");
644                 exit(1);
645         }
646 }
647 //**********************************************************************************************************************
648 int GetSeqsCommand::readTax(){
649         try {
650                 string thisOutputDir = outputDir;
651                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
652                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
653                 ofstream out;
654                 m->openOutputFile(outputFileName, out);
655                 
656                 ifstream in;
657                 m->openInputFile(taxfile, in);
658                 string name, tax;
659                 
660                 bool wroteSomething = false;
661                 
662                 while(!in.eof()){
663
664                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
665
666                         in >> name;                             //read from first column
667                         in >> tax;                      //read from second column
668                         
669                         //if this name is in the accnos file
670                         if (names.count(name) != 0) {
671                                 wroteSomething = true;
672                                 
673                                 out << name << '\t' << tax << endl;
674                         }
675                                         
676                         m->gobble(in);
677                 }
678                 in.close();
679                 out.close();
680                 
681                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
682                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
683                         
684                 return 0;
685
686         }
687         catch(exception& e) {
688                 m->errorOut(e, "GetSeqsCommand", "readTax");
689                 exit(1);
690         }
691 }
692 //**********************************************************************************************************************
693 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
694 int GetSeqsCommand::readAlign(){
695         try {
696                 string thisOutputDir = outputDir;
697                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
698                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
699                 ofstream out;
700                 m->openOutputFile(outputFileName, out);
701                 
702
703                 ifstream in;
704                 m->openInputFile(alignfile, in);
705                 string name, junk;
706                 
707                 bool wroteSomething = false;
708                 
709                 //read column headers
710                 for (int i = 0; i < 16; i++) {  
711                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
712                         else                    {       break;                  }
713                 }
714                 out << endl;
715                 
716                 while(!in.eof()){
717                 
718                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
719
720
721                         in >> name;                             //read from first column
722                         
723                         //if this name is in the accnos file
724                         if (names.count(name) != 0) {
725                                 wroteSomething = true;
726                                 
727                                 out << name << '\t';
728                                 
729                                 //read rest
730                                 for (int i = 0; i < 15; i++) {  
731                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
732                                         else                    {       break;                  }
733                                 }
734                                 out << endl;
735                                 
736                         }else {//still read just don't do anything with it
737                                 //read rest
738                                 for (int i = 0; i < 15; i++) {  
739                                         if (!in.eof())  {       in >> junk;             }
740                                         else                    {       break;                  }
741                                 }
742                         }
743                         
744                         m->gobble(in);
745                 }
746                 in.close();
747                 out.close();
748                 
749                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
750                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
751                 
752                 return 0;
753                 
754         }
755         catch(exception& e) {
756                 m->errorOut(e, "GetSeqsCommand", "readAlign");
757                 exit(1);
758         }
759 }
760 //**********************************************************************************************************************
761
762 int GetSeqsCommand::readAccnos(){
763         try {
764                 
765                 ifstream in;
766                 m->openInputFile(accnosfile, in);
767                 string name;
768                 
769                 while(!in.eof()){
770                         in >> name;
771                                                 
772                         names.insert(name);
773                         
774                         m->gobble(in);
775                 }
776                 in.close();     
777                 
778                 return 0;
779
780         }
781         catch(exception& e) {
782                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
783                 exit(1);
784         }
785 }
786 //**********************************************************************************************************************
787
788 int GetSeqsCommand::compareAccnos(){
789         try {
790                 
791                 string thisOutputDir = outputDir;
792                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
793                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
794                 ofstream out;
795                 m->openOutputFile(outputFileName, out);
796                 
797                 ifstream in;
798                 m->openInputFile(accnosfile2, in);
799                 string name;
800                 
801                 set<string> namesAccnos2;
802                 set<string> namesDups;
803                 set<string> namesAccnos = names;
804                 
805                 map<string, int> nameCount;
806                 
807                 if (namefile != "") {
808                         ifstream inName;
809                         m->openInputFile(namefile, inName);
810                         
811                         
812                         while(!inName.eof()){
813                                 
814                                 if (m->control_pressed) { inName.close(); return 0; }
815                                 
816                                 string thisname, repnames;
817                                 
818                                 inName >> thisname;             m->gobble(inName);              //read from first column
819                                 inName >> repnames;                     //read from second column
820                                 
821                                 int num = m->getNumNames(repnames);
822                                 nameCount[thisname] = num;
823                                 
824                                 m->gobble(inName);
825                         }
826                         inName.close(); 
827                 }
828                 
829                 while(!in.eof()){
830                         in >> name;
831                         
832                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
833                                 int pos = name.find_last_of('_');
834                                 string tempName = name;
835                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
836                                 if (namesAccnos.count(tempName) == 0){
837                                         namesAccnos2.insert(name);
838                                 }else { //you are in both so erase
839                                         namesAccnos.erase(name);
840                                         namesDups.insert(name);
841                                 }
842                         }else { //you are in both so erase
843                                 namesAccnos.erase(name);
844                                 namesDups.insert(name);
845                         }
846                         
847                         m->gobble(in);
848                 }
849                 in.close();     
850                 
851                 out << "Names in both files : " + toString(namesDups.size()) << endl;
852                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
853                 
854                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
855                         out << (*it);
856                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
857                         out << endl;
858                 }
859                 
860                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
861                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
862                 
863                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
864                         out << (*it);
865                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
866                         out << endl;
867                 }
868                 
869                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
870                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
871                 
872                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
873                         out << (*it);
874                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
875                         out << endl;
876                 }
877
878                 out.close(); 
879                 
880                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
881                 
882                 return 0;
883                 
884         }
885         catch(exception& e) {
886                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
887                 exit(1);
888         }
889 }
890
891
892 //**********************************************************************************************************************
893