]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
24                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
29
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetSeqsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetSeqsCommand::getHelpString(){ 
41         try {
42                 string helpString = "";
43                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy, quality or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
45                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
48                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
49                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59 GetSeqsCommand::GetSeqsCommand(){       
60         try {
61                 abort = true; calledHelp = true;
62                 setParameters();
63                 vector<string> tempOutNames;
64                 outputTypes["fasta"] = tempOutNames;
65                 outputTypes["taxonomy"] = tempOutNames;
66                 outputTypes["name"] = tempOutNames;
67                 outputTypes["group"] = tempOutNames;
68                 outputTypes["alignreport"] = tempOutNames;
69                 outputTypes["list"] = tempOutNames;
70                 outputTypes["qfile"] = tempOutNames;
71                 outputTypes["accnosreport"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 GetSeqsCommand::GetSeqsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["alignreport"] = tempOutNames;
108                         outputTypes["list"] = tempOutNames;
109                         outputTypes["qfile"] = tempOutNames;
110                         outputTypes["accnosreport"] = tempOutNames;
111                         
112                         //if the user changes the output directory command factory will send this info to us in the output parameter 
113                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
114                         
115                         //if the user changes the input directory command factory will send this info to us in the output parameter 
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                                 string path;
120                                 it = parameters.find("alignreport");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
126                                 }
127                                 
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("accnos");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
142                                 }
143                                 
144                                 it = parameters.find("accnos2");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
150                                 }
151                                 
152                                 it = parameters.find("list");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("group");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
174                                 }
175                                 
176                                 it = parameters.find("taxonomy");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
182                                 }
183                                 
184                                 it = parameters.find("qfile");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
190                                 }
191                         }
192
193                         
194                         //check for required parameters
195                         accnosfile = validParameter.validFile(parameters, "accnos", true);
196                         if (accnosfile == "not open") { abort = true; }
197                         else if (accnosfile == "not found") {  
198                                 accnosfile = m->getAccnosFile(); 
199                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
200                                 else { 
201                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
202                                         abort = true;
203                                 } 
204                         }else { m->setAccnosFile(accnosfile); } 
205                         
206                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
207                         
208                         fastafile = validParameter.validFile(parameters, "fasta", true);
209                         if (fastafile == "not open") { abort = true; }
210                         else if (fastafile == "not found") {  fastafile = "";  }
211                         else { m->setFastaFile(fastafile); }
212                         
213                         namefile = validParameter.validFile(parameters, "name", true);
214                         if (namefile == "not open") { abort = true; }
215                         else if (namefile == "not found") {  namefile = "";  }  
216                         else { m->setNameFile(namefile); }
217                         
218                         groupfile = validParameter.validFile(parameters, "group", true);
219                         if (groupfile == "not open") { abort = true; }
220                         else if (groupfile == "not found") {  groupfile = "";  }        
221                         else { m->setGroupFile(groupfile); }
222                         
223                         alignfile = validParameter.validFile(parameters, "alignreport", true);
224                         if (alignfile == "not open") { abort = true; }
225                         else if (alignfile == "not found") {  alignfile = "";  }
226                         
227                         listfile = validParameter.validFile(parameters, "list", true);
228                         if (listfile == "not open") { abort = true; }
229                         else if (listfile == "not found") {  listfile = "";  }
230                         else { m->setListFile(listfile); }
231                         
232                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
233                         if (taxfile == "not open") { abort = true; }
234                         else if (taxfile == "not found") {  taxfile = "";  }
235                         else { m->setTaxonomyFile(taxfile); }
236                         
237                         qualfile = validParameter.validFile(parameters, "qfile", true);
238                         if (qualfile == "not open") { abort = true; }
239                         else if (qualfile == "not found") {  qualfile = "";  }
240                         else { m->setQualFile(qualfile); }
241                         
242                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
243                         if (accnosfile2 == "not open") { abort = true; }
244                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
245                         
246                         
247                         string usedDups = "true";
248                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
249                         dups = m->isTrue(temp);
250                         
251                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
252                 }
253
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
257                 exit(1);
258         }
259 }
260 //**********************************************************************************************************************
261
262 int GetSeqsCommand::execute(){
263         try {
264                 
265                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
266                 
267                 //get names you want to keep
268                 readAccnos();
269                 
270                 if (m->control_pressed) { return 0; }
271                 
272                 //read through the correct file and output lines you want to keep
273                 if (namefile != "")                     {               readName();                     }
274                 if (fastafile != "")            {               readFasta();            }
275                 if (groupfile != "")            {               readGroup();            }
276                 if (alignfile != "")            {               readAlign();            }
277                 if (listfile != "")                     {               readList();                     }
278                 if (taxfile != "")                      {               readTax();                      }
279                 if (qualfile != "")                     {               readQual();                     }
280                 if (accnosfile2 != "")          {               compareAccnos();        }
281                 
282                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
283                 
284                 
285                 if (outputNames.size() != 0) {
286                         m->mothurOutEndLine();
287                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
288                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
289                         m->mothurOutEndLine();
290                         
291                         //set fasta file as new current fastafile
292                         string current = "";
293                         itTypes = outputTypes.find("fasta");
294                         if (itTypes != outputTypes.end()) {
295                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
296                         }
297                         
298                         itTypes = outputTypes.find("name");
299                         if (itTypes != outputTypes.end()) {
300                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
301                         }
302                         
303                         itTypes = outputTypes.find("group");
304                         if (itTypes != outputTypes.end()) {
305                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
306                         }
307                         
308                         itTypes = outputTypes.find("list");
309                         if (itTypes != outputTypes.end()) {
310                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
311                         }
312                         
313                         itTypes = outputTypes.find("taxonomy");
314                         if (itTypes != outputTypes.end()) {
315                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
316                         }
317                         
318                         itTypes = outputTypes.find("qfile");
319                         if (itTypes != outputTypes.end()) {
320                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
321                         }
322                         
323                 }
324                 
325                 return 0;               
326         }
327
328         catch(exception& e) {
329                 m->errorOut(e, "GetSeqsCommand", "execute");
330                 exit(1);
331         }
332 }
333
334 //**********************************************************************************************************************
335 int GetSeqsCommand::readFasta(){
336         try {
337                 string thisOutputDir = outputDir;
338                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
339                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" +  m->getExtension(fastafile);
340                 ofstream out;
341                 m->openOutputFile(outputFileName, out);
342                 
343                 
344                 ifstream in;
345                 m->openInputFile(fastafile, in);
346                 string name;
347                 
348                 bool wroteSomething = false;
349                 int selectedCount = 0;
350                 
351                 while(!in.eof()){
352                 
353                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
354                         
355                         Sequence currSeq(in);
356                         name = currSeq.getName();
357                         
358                         if (name != "") {
359                                 //if this name is in the accnos file
360                                 if (names.count(name) != 0) {
361                                         wroteSomething = true;
362                                         
363                                         currSeq.printSequence(out);
364                                         selectedCount++;
365                                 }
366                         }
367                         m->gobble(in);
368                 }
369                 in.close();     
370                 out.close();
371                 
372                 
373                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
374                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
375                 
376                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
377                 
378                 return 0;
379
380         }
381         catch(exception& e) {
382                 m->errorOut(e, "GetSeqsCommand", "readFasta");
383                 exit(1);
384         }
385 }
386 //**********************************************************************************************************************
387 int GetSeqsCommand::readQual(){
388         try {
389                 string thisOutputDir = outputDir;
390                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
391                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + "pick" +  m->getExtension(qualfile);
392                 ofstream out;
393                 m->openOutputFile(outputFileName, out);
394                 
395                 
396                 ifstream in;
397                 m->openInputFile(qualfile, in);
398                 string name;
399                 
400                 bool wroteSomething = false;
401                 int selectedCount = 0;
402                 
403                 
404                 while(!in.eof()){       
405                         string saveName = "";
406                         string name = "";
407                         string scores = "";
408                         
409                         in >> name; 
410                                 
411                         if (name.length() != 0) { 
412                                 saveName = name.substr(1);
413                                 while (!in.eof())       {       
414                                         char c = in.get(); 
415                                         if (c == 10 || c == 13){        break;  }
416                                         else { name += c; }     
417                                 } 
418                                 m->gobble(in);
419                         }
420                         
421                         while(in){
422                                 char letter= in.get();
423                                 if(letter == '>'){      in.putback(letter);     break;  }
424                                 else{ scores += letter; }
425                         }
426                         
427                         m->gobble(in);
428                         
429                         if (names.count(saveName) != 0) {
430                                 wroteSomething = true;
431                                                 
432                                 out << name << endl << scores;
433                                 selectedCount++;
434                         }
435                         
436                         m->gobble(in);
437                 }
438                 in.close();
439                 out.close();
440                 
441                 
442                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
443                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
444                 
445                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
446
447                 
448                 return 0;
449                 
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "GetSeqsCommand", "readQual");
453                 exit(1);
454         }
455 }
456 //**********************************************************************************************************************
457 int GetSeqsCommand::readList(){
458         try {
459                 string thisOutputDir = outputDir;
460                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
461                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
462                 ofstream out;
463                 m->openOutputFile(outputFileName, out);
464                 
465                 ifstream in;
466                 m->openInputFile(listfile, in);
467                 
468                 bool wroteSomething = false;
469                 int selectedCount = 0;
470                 
471                 while(!in.eof()){
472                         
473                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
474
475                         //read in list vector
476                         ListVector list(in);
477                         
478                         //make a new list vector
479                         ListVector newList;
480                         newList.setLabel(list.getLabel());
481                         
482                         //for each bin
483                         for (int i = 0; i < list.getNumBins(); i++) {
484                         
485                                 //parse out names that are in accnos file
486                                 string binnames = list.get(i);
487                                 
488                                 string newNames = "";
489                                 while (binnames.find_first_of(',') != -1) { 
490                                         string name = binnames.substr(0,binnames.find_first_of(','));
491                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
492                                         
493                                         //if that name is in the .accnos file, add it
494                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; }
495                                 }
496                         
497                                 //get last name
498                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++; }
499
500                                 //if there are names in this bin add to new list
501                                 if (newNames != "") { 
502                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
503                                         newList.push_back(newNames);    
504                                 }
505                         }
506                                 
507                         //print new listvector
508                         if (newList.getNumBins() != 0) {
509                                 wroteSomething = true;
510                                 newList.print(out);
511                         }
512                         
513                         m->gobble(in);
514                 }
515                 in.close();     
516                 out.close();
517                 
518                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
519                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
520                 
521                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
522                 
523                 return 0;
524
525         }
526         catch(exception& e) {
527                 m->errorOut(e, "GetSeqsCommand", "readList");
528                 exit(1);
529         }
530 }
531 //**********************************************************************************************************************
532 int GetSeqsCommand::readName(){
533         try {
534                 string thisOutputDir = outputDir;
535                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
536                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" +  m->getExtension(namefile);
537                 ofstream out;
538                 m->openOutputFile(outputFileName, out);
539                 
540
541                 ifstream in;
542                 m->openInputFile(namefile, in);
543                 string name, firstCol, secondCol;
544                 
545                 bool wroteSomething = false;
546                 int selectedCount = 0;
547                 
548                 while(!in.eof()){
549                 
550                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
551
552                         in >> firstCol;                         
553                         in >> secondCol;
554                         
555                         string hold = "";
556                         if (dups) { hold = secondCol; }
557                         
558                         vector<string> parsedNames;
559                         m->splitAtComma(secondCol, parsedNames);
560                         
561                         vector<string> validSecond;
562                         for (int i = 0; i < parsedNames.size(); i++) {
563                                 if (names.count(parsedNames[i]) != 0) {
564                                         validSecond.push_back(parsedNames[i]);
565                                 }
566                         }
567
568                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
569                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
570                                 out << firstCol << '\t' << hold << endl;
571                                 wroteSomething = true;
572                                 selectedCount += parsedNames.size();
573                         }else {
574                                 selectedCount += validSecond.size();
575                                 
576                                 //if the name in the first column is in the set then print it and any other names in second column also in set
577                                 if (names.count(firstCol) != 0) {
578                                 
579                                         wroteSomething = true;
580                                         
581                                         out << firstCol << '\t';
582                                         
583                                         //you know you have at least one valid second since first column is valid
584                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
585                                         out << validSecond[validSecond.size()-1] << endl;
586                                         
587                                 
588                                 //make first name in set you come to first column and then add the remaining names to second column
589                                 }else {
590                                         //you want part of this row
591                                         if (validSecond.size() != 0) {
592                                         
593                                                 wroteSomething = true;
594                                                 
595                                                 out << validSecond[0] << '\t';
596                                         
597                                                 //you know you have at least one valid second since first column is valid
598                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
599                                                 out << validSecond[validSecond.size()-1] << endl;
600                                         }
601                                 }
602                         }
603                         m->gobble(in);
604                 }
605                 in.close();
606                 out.close();
607                 
608                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
609                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
610                 
611                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
612                 
613                 return 0;
614                 
615         }
616         catch(exception& e) {
617                 m->errorOut(e, "GetSeqsCommand", "readName");
618                 exit(1);
619         }
620 }
621
622 //**********************************************************************************************************************
623 int GetSeqsCommand::readGroup(){
624         try {
625                 string thisOutputDir = outputDir;
626                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
627                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
628                 ofstream out;
629                 m->openOutputFile(outputFileName, out);
630                 
631
632                 ifstream in;
633                 m->openInputFile(groupfile, in);
634                 string name, group;
635                 
636                 bool wroteSomething = false;
637                 int selectedCount = 0;
638                 
639                 while(!in.eof()){
640
641                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
642
643
644                         in >> name;                             //read from first column
645                         in >> group;                    //read from second column
646                         
647                         //if this name is in the accnos file
648                         if (names.count(name) != 0) {
649                                 wroteSomething = true;
650                                 
651                                 out << name << '\t' << group << endl;
652                                 selectedCount++;
653                         }
654                                         
655                         m->gobble(in);
656                 }
657                 in.close();
658                 out.close();
659                 
660                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
661                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
662                 
663                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
664
665                 
666                 return 0;
667
668         }
669         catch(exception& e) {
670                 m->errorOut(e, "GetSeqsCommand", "readGroup");
671                 exit(1);
672         }
673 }
674 //**********************************************************************************************************************
675 int GetSeqsCommand::readTax(){
676         try {
677                 string thisOutputDir = outputDir;
678                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
679                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
680                 ofstream out;
681                 m->openOutputFile(outputFileName, out);
682                 
683                 ifstream in;
684                 m->openInputFile(taxfile, in);
685                 string name, tax;
686                 
687                 bool wroteSomething = false;
688                 int selectedCount = 0;
689                 
690                 while(!in.eof()){
691
692                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
693
694                         in >> name;                             //read from first column
695                         in >> tax;                      //read from second column
696                         
697                         //if this name is in the accnos file
698                         if (names.count(name) != 0) {
699                                 wroteSomething = true;
700                                 
701                                 out << name << '\t' << tax << endl;
702                                 selectedCount++;
703                         }
704                                         
705                         m->gobble(in);
706                 }
707                 in.close();
708                 out.close();
709                 
710                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
711                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
712                 
713                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
714                         
715                 return 0;
716
717         }
718         catch(exception& e) {
719                 m->errorOut(e, "GetSeqsCommand", "readTax");
720                 exit(1);
721         }
722 }
723 //**********************************************************************************************************************
724 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
725 int GetSeqsCommand::readAlign(){
726         try {
727                 string thisOutputDir = outputDir;
728                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
729                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
730                 ofstream out;
731                 m->openOutputFile(outputFileName, out);
732                 
733
734                 ifstream in;
735                 m->openInputFile(alignfile, in);
736                 string name, junk;
737                 
738                 bool wroteSomething = false;
739                 int selectedCount = 0;
740                 
741                 //read column headers
742                 for (int i = 0; i < 16; i++) {  
743                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
744                         else                    {       break;                  }
745                 }
746                 out << endl;
747                 
748                 while(!in.eof()){
749                 
750                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
751
752
753                         in >> name;                             //read from first column
754                         
755                         //if this name is in the accnos file
756                         if (names.count(name) != 0) {
757                                 wroteSomething = true;
758                                 selectedCount++;
759                                 
760                                 out << name << '\t';
761                                 
762                                 //read rest
763                                 for (int i = 0; i < 15; i++) {  
764                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
765                                         else                    {       break;                  }
766                                 }
767                                 out << endl;
768                                 
769                         }else {//still read just don't do anything with it
770                                 //read rest
771                                 for (int i = 0; i < 15; i++) {  
772                                         if (!in.eof())  {       in >> junk;             }
773                                         else                    {       break;                  }
774                                 }
775                         }
776                         
777                         m->gobble(in);
778                 }
779                 in.close();
780                 out.close();
781                 
782                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
783                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
784                 
785                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
786                 
787                 return 0;
788                 
789         }
790         catch(exception& e) {
791                 m->errorOut(e, "GetSeqsCommand", "readAlign");
792                 exit(1);
793         }
794 }
795 //**********************************************************************************************************************
796
797 int GetSeqsCommand::readAccnos(){
798         try {
799                 
800                 ifstream in;
801                 m->openInputFile(accnosfile, in);
802                 string name;
803                 
804                 while(!in.eof()){
805                         in >> name;
806                                                 
807                         names.insert(name);
808                         
809                         m->gobble(in);
810                 }
811                 in.close();     
812                 
813                 return 0;
814
815         }
816         catch(exception& e) {
817                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
818                 exit(1);
819         }
820 }
821 //**********************************************************************************************************************
822
823 int GetSeqsCommand::compareAccnos(){
824         try {
825                 
826                 string thisOutputDir = outputDir;
827                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
828                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + "accnos.report";
829                 ofstream out;
830                 m->openOutputFile(outputFileName, out);
831                 
832                 ifstream in;
833                 m->openInputFile(accnosfile2, in);
834                 string name;
835                 
836                 set<string> namesAccnos2;
837                 set<string> namesDups;
838                 set<string> namesAccnos = names;
839                 
840                 map<string, int> nameCount;
841                 
842                 if (namefile != "") {
843                         ifstream inName;
844                         m->openInputFile(namefile, inName);
845                         
846                         
847                         while(!inName.eof()){
848                                 
849                                 if (m->control_pressed) { inName.close(); return 0; }
850                                 
851                                 string thisname, repnames;
852                                 
853                                 inName >> thisname;             m->gobble(inName);              //read from first column
854                                 inName >> repnames;                     //read from second column
855                                 
856                                 int num = m->getNumNames(repnames);
857                                 nameCount[thisname] = num;
858                                 
859                                 m->gobble(inName);
860                         }
861                         inName.close(); 
862                 }
863                 
864                 while(!in.eof()){
865                         in >> name;
866                         
867                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
868                                 int pos = name.find_last_of('_');
869                                 string tempName = name;
870                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
871                                 if (namesAccnos.count(tempName) == 0){
872                                         namesAccnos2.insert(name);
873                                 }else { //you are in both so erase
874                                         namesAccnos.erase(name);
875                                         namesDups.insert(name);
876                                 }
877                         }else { //you are in both so erase
878                                 namesAccnos.erase(name);
879                                 namesDups.insert(name);
880                         }
881                         
882                         m->gobble(in);
883                 }
884                 in.close();     
885                 
886                 out << "Names in both files : " + toString(namesDups.size()) << endl;
887                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
888                 
889                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
890                         out << (*it);
891                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
892                         out << endl;
893                 }
894                 
895                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
896                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
897                 
898                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
899                         out << (*it);
900                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
901                         out << endl;
902                 }
903                 
904                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
905                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
906                 
907                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
908                         out << (*it);
909                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
910                         out << endl;
911                 }
912
913                 out.close(); 
914                 
915                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
916                 
917                 return 0;
918                 
919         }
920         catch(exception& e) {
921                 m->errorOut(e, "GetSeqsCommand", "readAccnos");
922                 exit(1);
923         }
924 }
925
926
927 //**********************************************************************************************************************
928