]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
ffe49b48810d7916e5324efd9480c87e8322ec92
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
45                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 GetSeqsCommand::GetSeqsCommand(){       
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71                 outputTypes["accnosreport"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 GetSeqsCommand::GetSeqsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["alignreport"] = tempOutNames;
108                         outputTypes["list"] = tempOutNames;
109                         outputTypes["qfile"] = tempOutNames;
110                         outputTypes["accnosreport"] = tempOutNames;
111                         
112                         //if the user changes the output directory command factory will send this info to us in the output parameter 
113                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("alignreport");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
126                                 }
127                                 
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("accnos");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
142                                 }
143                                 
144                                 it = parameters.find("accnos2");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
150                                 }
151                                 
152                                 it = parameters.find("list");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                                 
176                                 it = parameters.find("taxonomy");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
182                                 }
183                                 
184                                 it = parameters.find("qfile");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
190                                 }
191                         }
192
193                         
194                         //check for required parameters
195                         accnosfile = validParameter.validFile(parameters, "accnos", true);
196                         if (accnosfile == "not open") { abort = true; }
197                         else if (accnosfile == "not found") {  
198                                 accnosfile = m->getAccnosFile(); 
199                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
200                                 else { 
201                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
202                                         abort = true;
203                                 } 
204                         }else { m->setAccnosFile(accnosfile); } 
205                         
206                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
207                         
208                         fastafile = validParameter.validFile(parameters, "fasta", true);
209                         if (fastafile == "not open") { abort = true; }
210                         else if (fastafile == "not found") {  fastafile = "";  }
211                         else { m->setFastaFile(fastafile); }
212                         
213                         namefile = validParameter.validFile(parameters, "name", true);
214                         if (namefile == "not open") { abort = true; }
215                         else if (namefile == "not found") {  namefile = "";  }  
216                         else { m->setNameFile(namefile); }
217                         
218                         groupfile = validParameter.validFile(parameters, "group", true);
219                         if (groupfile == "not open") { abort = true; }
220                         else if (groupfile == "not found") {  groupfile = "";  }        
221                         else { m->setGroupFile(groupfile); }
222                         
223                         alignfile = validParameter.validFile(parameters, "alignreport", true);
224                         if (alignfile == "not open") { abort = true; }
225                         else if (alignfile == "not found") {  alignfile = "";  }
226                         
227                         listfile = validParameter.validFile(parameters, "list", true);
228                         if (listfile == "not open") { abort = true; }
229                         else if (listfile == "not found") {  listfile = "";  }
230                         else { m->setListFile(listfile); }
231                         
232                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
233                         if (taxfile == "not open") { abort = true; }
234                         else if (taxfile == "not found") {  taxfile = "";  }
235                         else { m->setTaxonomyFile(taxfile); }
236                         
237                         qualfile = validParameter.validFile(parameters, "qfile", true);
238                         if (qualfile == "not open") { abort = true; }
239                         else if (qualfile == "not found") {  qualfile = "";  }
240                         else { m->setQualFile(qualfile); }
241                         
242                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
243                         if (accnosfile2 == "not open") { abort = true; }
244                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
245                         
246                         
247                         string usedDups = "true";
248                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
249                         dups = m->isTrue(temp);
250                         
251                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
252                 }
253
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
257                 exit(1);
258         }
259 }
260 //**********************************************************************************************************************
261
262 int GetSeqsCommand::execute(){
263         try {
264                 
265                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
266                 
267                 //get names you want to keep
268                 readAccnos();
269                 
270                 if (m->control_pressed) { return 0; }
271                 
272                 //read through the correct file and output lines you want to keep
273                 if (namefile != "")                     {               readName();                     }
274                 if (fastafile != "")            {               readFasta();            }
275                 if (groupfile != "")            {               readGroup();            }
276                 if (alignfile != "")            {               readAlign();            }
277                 if (listfile != "")                     {               readList();                     }
278                 if (taxfile != "")                      {               readTax();                      }
279                 if (qualfile != "")                     {               readQual();                     }
280                 if (accnosfile2 != "")          {               compareAccnos();        }
281                 
282                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
283                 
284                 
285                 if (outputNames.size() != 0) {
286                         m->mothurOutEndLine();
287                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
289                         m->mothurOutEndLine();
290                         
291                         //set fasta file as new current fastafile
292                         string current = "";
293                         itTypes = outputTypes.find("fasta");
294                         if (itTypes != outputTypes.end()) {
295                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
296                         }
297                         
298                         itTypes = outputTypes.find("name");
299                         if (itTypes != outputTypes.end()) {
300                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
301                         }
302                         
303                         itTypes = outputTypes.find("group");
304                         if (itTypes != outputTypes.end()) {
305                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
306                         }
307                         
308                         itTypes = outputTypes.find("list");
309                         if (itTypes != outputTypes.end()) {
310                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
311                         }
312                         
313                         itTypes = outputTypes.find("taxonomy");
314                         if (itTypes != outputTypes.end()) {
315                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
316                         }
317                         
318                         itTypes = outputTypes.find("qfile");
319                         if (itTypes != outputTypes.end()) {
320                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
321                         }
322                         
323                 }
324                 
325                 return 0;               
326         }
327
328         catch(exception& e) {
329                 m->errorOut(e, "GetSeqsCommand", "execute");
330                 exit(1);
331         }
332 }
333
334 //**********************************************************************************************************************
335 int GetSeqsCommand::readFasta(){
336         try {
337                 string thisOutputDir = outputDir;
338                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
339                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
340                 ofstream out;
341                 m->openOutputFile(outputFileName, out);
342                 
343                 
344                 ifstream in;
345                 m->openInputFile(fastafile, in);
346                 string name;
347                 
348                 bool wroteSomething = false;
349                 int selectedCount = 0;
350                 
351                 while(!in.eof()){
352                 
353                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
354                         
355                         Sequence currSeq(in);
356                         name = currSeq.getName();
357                         
358                         if (name != "") {
359                                 //if this name is in the accnos file
360                                 if (names.count(name) != 0) {
361                                         wroteSomething = true;
362                                         
363                                         currSeq.printSequence(out);
364                                         selectedCount++;
365                                 }
366                         }
367                         m->gobble(in);
368                 }
369                 in.close();     
370                 out.close();
371                 
372                 
373                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
374                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
375                 
376                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
377                 
378                 return 0;
379
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "GetSeqsCommand", "readFasta");
383                 exit(1);
384         }
385 }
386 //**********************************************************************************************************************
387 int GetSeqsCommand::readQual(){
388         try {
389                 string thisOutputDir = outputDir;
390                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
391                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
392                 ofstream out;
393                 m->openOutputFile(outputFileName, out);
394                 
395                 
396                 ifstream in;
397                 m->openInputFile(qualfile, in);
398                 string name;
399                 
400                 bool wroteSomething = false;
401                 int selectedCount = 0;
402                 
403                 
404                 while(!in.eof()){       
405                         string saveName = "";
406                         string name = "";
407                         string scores = "";
408                         
409                         in >> name; 
410                                 
411                         if (name.length() != 0) { 
412                                 saveName = name.substr(1);
413                                 while (!in.eof())       {       
414                                         char c = in.get(); 
415                                         if (c == 10 || c == 13){        break;  }
416                                         else { name += c; }     
417                                 } 
418                                 m->gobble(in);
419                         }
420                         
421                         while(in){
422                                 char letter= in.get();
423                                 if(letter == '>'){      in.putback(letter);     break;  }
424                                 else{ scores += letter; }
425                         }
426                         
427                         m->gobble(in);
428                         
429                         if (names.count(saveName) != 0) {
430                                 wroteSomething = true;
431                                                 
432                                 out << name << endl << scores;
433                                 selectedCount++;
434                         }
435                         
436                         m->gobble(in);
437                 }
438                 in.close();
439                 out.close();
440                 
441                 
442                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
443                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
444                 
445                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
446
447                 
448                 return 0;
449                 
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "GetSeqsCommand", "readQual");
453                 exit(1);
454         }
455 }
456 //**********************************************************************************************************************
457 int GetSeqsCommand::readList(){
458         try {
459                 string thisOutputDir = outputDir;
460                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
461                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
462                 ofstream out;
463                 m->openOutputFile(outputFileName, out);
464                 
465                 ifstream in;
466                 m->openInputFile(listfile, in);
467                 
468                 bool wroteSomething = false;
469                 int selectedCount = 0;
470                 
471                 while(!in.eof()){
472                         
473                         selectedCount = 0;
474                         
475                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
476
477                         //read in list vector
478                         ListVector list(in);
479                         
480                         //make a new list vector
481                         ListVector newList;
482                         newList.setLabel(list.getLabel());
483                         
484                         //for each bin
485                         for (int i = 0; i < list.getNumBins(); i++) {
486                         
487                                 //parse out names that are in accnos file
488                                 string binnames = list.get(i);
489                                 
490                                 string newNames = "";
491                                 while (binnames.find_first_of(',') != -1) { 
492                                         string name = binnames.substr(0,binnames.find_first_of(','));
493                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
494                                         
495                                         //if that name is in the .accnos file, add it
496                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; }
497                                 }
498                         
499                                 //get last name
500                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++; }
501
502                                 //if there are names in this bin add to new list
503                                 if (newNames != "") { 
504                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
505                                         newList.push_back(newNames);    
506                                 }
507                         }
508                                 
509                         //print new listvector
510                         if (newList.getNumBins() != 0) {
511                                 wroteSomething = true;
512                                 newList.print(out);
513                         }
514                         
515                         m->gobble(in);
516                 }
517                 in.close();     
518                 out.close();
519                 
520                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
521                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
522                 
523                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
524                 
525                 return 0;
526
527         }
528         catch(exception& e) {
529                 m->errorOut(e, "GetSeqsCommand", "readList");
530                 exit(1);
531         }
532 }
533 //**********************************************************************************************************************
534 int GetSeqsCommand::readName(){
535         try {
536                 string thisOutputDir = outputDir;
537                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
538                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
539                 ofstream out;
540                 m->openOutputFile(outputFileName, out);
541                 
542
543                 ifstream in;
544                 m->openInputFile(namefile, in);
545                 string name, firstCol, secondCol;
546                 
547                 bool wroteSomething = false;
548                 int selectedCount = 0;
549                 
550                 while(!in.eof()){
551                 
552                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
553
554                         in >> firstCol;                         
555                         in >> secondCol;
556                         
557                         string hold = "";
558                         if (dups) { hold = secondCol; }
559                         
560                         vector<string> parsedNames;
561                         m->splitAtComma(secondCol, parsedNames);
562                         
563                         vector<string> validSecond;
564                         for (int i = 0; i < parsedNames.size(); i++) {
565                                 if (names.count(parsedNames[i]) != 0) {
566                                         validSecond.push_back(parsedNames[i]);
567                                 }
568                         }
569
570                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
571                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
572                                 out << firstCol << '\t' << hold << endl;
573                                 wroteSomething = true;
574                                 selectedCount += parsedNames.size();
575                         }else {
576                                 selectedCount += validSecond.size();
577                                 
578                                 //if the name in the first column is in the set then print it and any other names in second column also in set
579                                 if (names.count(firstCol) != 0) {
580                                 
581                                         wroteSomething = true;
582                                         
583                                         out << firstCol << '\t';
584                                         
585                                         //you know you have at least one valid second since first column is valid
586                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
587                                         out << validSecond[validSecond.size()-1] << endl;
588                                         
589                                 
590                                 //make first name in set you come to first column and then add the remaining names to second column
591                                 }else {
592                                         //you want part of this row
593                                         if (validSecond.size() != 0) {
594                                         
595                                                 wroteSomething = true;
596                                                 
597                                                 out << validSecond[0] << '\t';
598                                         
599                                                 //you know you have at least one valid second since first column is valid
600                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
601                                                 out << validSecond[validSecond.size()-1] << endl;
602                                         }
603                                 }
604                         }
605                         m->gobble(in);
606                 }
607                 in.close();
608                 out.close();
609                 
610                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
611                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
612                 
613                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
614                 
615                 return 0;
616                 
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "GetSeqsCommand", "readName");
620                 exit(1);
621         }
622 }
623
624 //**********************************************************************************************************************
625 int GetSeqsCommand::readGroup(){
626         try {
627                 string thisOutputDir = outputDir;
628                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
629                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
630                 ofstream out;
631                 m->openOutputFile(outputFileName, out);
632                 
633
634                 ifstream in;
635                 m->openInputFile(groupfile, in);
636                 string name, group;
637                 
638                 bool wroteSomething = false;
639                 int selectedCount = 0;
640                 
641                 while(!in.eof()){
642
643                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
644
645
646                         in >> name;                             //read from first column
647                         in >> group;                    //read from second column
648                         
649                         //if this name is in the accnos file
650                         if (names.count(name) != 0) {
651                                 wroteSomething = true;
652                                 
653                                 out << name << '\t' << group << endl;
654                                 selectedCount++;
655                         }
656                                         
657                         m->gobble(in);
658                 }
659                 in.close();
660                 out.close();
661                 
662                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
663                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
664                 
665                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
666
667                 
668                 return 0;
669
670         }
671         catch(exception& e) {
672                 m->errorOut(e, "GetSeqsCommand", "readGroup");
673                 exit(1);
674         }
675 }
676 //**********************************************************************************************************************
677 int GetSeqsCommand::readTax(){
678         try {
679                 string thisOutputDir = outputDir;
680                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
681                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
682                 ofstream out;
683                 m->openOutputFile(outputFileName, out);
684                 
685                 ifstream in;
686                 m->openInputFile(taxfile, in);
687                 string name, tax;
688                 
689                 bool wroteSomething = false;
690                 int selectedCount = 0;
691                 
692                 while(!in.eof()){
693
694                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
695
696                         in >> name;                             //read from first column
697                         in >> tax;                      //read from second column
698                         
699                         //if this name is in the accnos file
700                         if (names.count(name) != 0) {
701                                 wroteSomething = true;
702                                 
703                                 out << name << '\t' << tax << endl;
704                                 selectedCount++;
705                         }
706                                         
707                         m->gobble(in);
708                 }
709                 in.close();
710                 out.close();
711                 
712                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
713                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
714                 
715                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
716                         
717                 return 0;
718
719         }
720         catch(exception& e) {
721                 m->errorOut(e, "GetSeqsCommand", "readTax");
722                 exit(1);
723         }
724 }
725 //**********************************************************************************************************************
726 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
727 int GetSeqsCommand::readAlign(){
728         try {
729                 string thisOutputDir = outputDir;
730                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
731                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
732                 ofstream out;
733                 m->openOutputFile(outputFileName, out);
734                 
735
736                 ifstream in;
737                 m->openInputFile(alignfile, in);
738                 string name, junk;
739                 
740                 bool wroteSomething = false;
741                 int selectedCount = 0;
742                 
743                 //read column headers
744                 for (int i = 0; i < 16; i++) {  
745                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
746                         else                    {       break;                  }
747                 }
748                 out << endl;
749                 
750                 while(!in.eof()){
751                 
752                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
753
754
755                         in >> name;                             //read from first column
756                         
757                         //if this name is in the accnos file
758                         if (names.count(name) != 0) {
759                                 wroteSomething = true;
760                                 selectedCount++;
761                                 
762                                 out << name << '\t';
763                                 
764                                 //read rest
765                                 for (int i = 0; i < 15; i++) {  
766                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
767                                         else                    {       break;                  }
768                                 }
769                                 out << endl;
770                                 
771                         }else {//still read just don't do anything with it
772                                 //read rest
773                                 for (int i = 0; i < 15; i++) {  
774                                         if (!in.eof())  {       in >> junk;             }
775                                         else                    {       break;                  }
776                                 }
777                         }
778                         
779                         m->gobble(in);
780                 }
781                 in.close();
782                 out.close();
783                 
784                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
785                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
786                 
787                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
788                 
789                 return 0;
790                 
791         }
792         catch(exception& e) {
793                 m->errorOut(e, "GetSeqsCommand", "readAlign");
794                 exit(1);
795         }
796 }
797 //**********************************************************************************************************************
798
799 int GetSeqsCommand::readAccnos(){
800         try {
801                 
802                 ifstream in;
803                 m->openInputFile(accnosfile, in);
804                 string name;
805                 
806                 while(!in.eof()){
807                         in >> name;
808                                                 
809                         names.insert(name);
810                         
811                         m->gobble(in);
812                 }
813                 in.close();     
814                 
815                 return 0;
816
817         }
818         catch(exception& e) {
819                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
820                 exit(1);
821         }
822 }
823 //**********************************************************************************************************************
824
825 int GetSeqsCommand::compareAccnos(){
826         try {
827                 
828                 string thisOutputDir = outputDir;
829                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
830                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
831                 ofstream out;
832                 m->openOutputFile(outputFileName, out);
833                 
834                 ifstream in;
835                 m->openInputFile(accnosfile2, in);
836                 string name;
837                 
838                 set<string> namesAccnos2;
839                 set<string> namesDups;
840                 set<string> namesAccnos = names;
841                 
842                 map<string, int> nameCount;
843                 
844                 if (namefile != "") {
845                         ifstream inName;
846                         m->openInputFile(namefile, inName);
847                         
848                         
849                         while(!inName.eof()){
850                                 
851                                 if (m->control_pressed) { inName.close(); return 0; }
852                                 
853                                 string thisname, repnames;
854                                 
855                                 inName >> thisname;             m->gobble(inName);              //read from first column
856                                 inName >> repnames;                     //read from second column
857                                 
858                                 int num = m->getNumNames(repnames);
859                                 nameCount[thisname] = num;
860                                 
861                                 m->gobble(inName);
862                         }
863                         inName.close(); 
864                 }
865                 
866                 while(!in.eof()){
867                         in >> name;
868                         
869                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
870                                 int pos = name.find_last_of('_');
871                                 string tempName = name;
872                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
873                                 if (namesAccnos.count(tempName) == 0){
874                                         namesAccnos2.insert(name);
875                                 }else { //you are in both so erase
876                                         namesAccnos.erase(name);
877                                         namesDups.insert(name);
878                                 }
879                         }else { //you are in both so erase
880                                 namesAccnos.erase(name);
881                                 namesDups.insert(name);
882                         }
883                         
884                         m->gobble(in);
885                 }
886                 in.close();     
887                 
888                 out << "Names in both files : " + toString(namesDups.size()) << endl;
889                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
890                 
891                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
892                         out << (*it);
893                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
894                         out << endl;
895                 }
896                 
897                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
898                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
899                 
900                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
901                         out << (*it);
902                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
903                         out << endl;
904                 }
905                 
906                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
907                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
908                 
909                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
910                         out << (*it);
911                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
912                         out << endl;
913                 }
914
915                 out.close(); 
916                 
917                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
918                 
919                 return 0;
920                 
921         }
922         catch(exception& e) {
923                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
924                 exit(1);
925         }
926 }
927
928
929 //**********************************************************************************************************************
930