]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
e0faef4cc07bd3fc6626fe186ec67fe05c512023
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetSeqsCommand::setParameters(){ 
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
23                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
24                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
25                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
26                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
30
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "GetSeqsCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string GetSeqsCommand::getHelpString(){ 
42         try {
43                 string helpString = "";
44                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
45                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
46                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
47                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
48                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
49                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
50                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
51                 return helpString;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
55                 exit(1);
56         }
57 }
58
59 //**********************************************************************************************************************
60 GetSeqsCommand::GetSeqsCommand(){       
61         try {
62                 abort = true; calledHelp = true;
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["fasta"] = tempOutNames;
66                 outputTypes["taxonomy"] = tempOutNames;
67                 outputTypes["name"] = tempOutNames;
68                 outputTypes["group"] = tempOutNames;
69                 outputTypes["alignreport"] = tempOutNames;
70                 outputTypes["list"] = tempOutNames;
71                 outputTypes["qfile"] = tempOutNames;
72         outputTypes["count"] = tempOutNames;
73                 outputTypes["accnosreport"] = tempOutNames;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
77                 exit(1);
78         }
79 }
80 //**********************************************************************************************************************
81 string GetSeqsCommand::getOutputFileNameTag(string type, string inputName=""){  
82         try {
83         string outputFileName = "";
84                 map<string, vector<string> >::iterator it;
85         
86         //is this a type this command creates
87         it = outputTypes.find(type);
88         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
89         else {
90             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
91             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
92             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
93             else if (type == "count")       {   outputFileName =  "pick.count.table";   }
94             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
95             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
96             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
97             else if (type == "accnosreport"){   outputFileName =  "accnos.report";                       }
98             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
99             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
100         }
101         return outputFileName;
102         }
103         catch(exception& e) {
104                 m->errorOut(e, "GetSeqsCommand", "getOutputFileNameTag");
105                 exit(1);
106         }
107 }
108 //**********************************************************************************************************************
109 GetSeqsCommand::GetSeqsCommand(string option)  {
110         try {
111                 abort = false; calledHelp = false;   
112                                 
113                 //allow user to run help
114                 if(option == "help") { help(); abort = true; calledHelp = true; }
115                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
116                 
117                 else {
118                         vector<string> myArray = setParameters();
119                         
120                         OptionParser parser(option);
121                         map<string,string> parameters = parser.getParameters();
122                         
123                         ValidParameters validParameter;
124                         map<string,string>::iterator it;
125                         
126                         //check to make sure all parameters are valid for command
127                         for (it = parameters.begin(); it != parameters.end(); it++) { 
128                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
129                         }
130                         
131                         //initialize outputTypes
132                         vector<string> tempOutNames;
133                         outputTypes["fasta"] = tempOutNames;
134                         outputTypes["taxonomy"] = tempOutNames;
135                         outputTypes["name"] = tempOutNames;
136                         outputTypes["group"] = tempOutNames;
137                         outputTypes["alignreport"] = tempOutNames;
138                         outputTypes["list"] = tempOutNames;
139                         outputTypes["qfile"] = tempOutNames;
140                         outputTypes["accnosreport"] = tempOutNames;
141             outputTypes["count"] = tempOutNames;
142                         
143                         //if the user changes the output directory command factory will send this info to us in the output parameter 
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
148                         if (inputDir == "not found"){   inputDir = "";          }
149                         else {
150                                 string path;
151                                 it = parameters.find("alignreport");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
157                                 }
158                                 
159                                 it = parameters.find("fasta");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
165                                 }
166                                 
167                                 it = parameters.find("accnos");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
173                                 }
174                                 
175                                 it = parameters.find("accnos2");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
181                                 }
182                                 
183                                 it = parameters.find("list");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
189                                 }
190                                 
191                                 it = parameters.find("name");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
197                                 }
198                                 
199                                 it = parameters.find("group");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
205                                 }
206                                 
207                                 it = parameters.find("taxonomy");
208                                 //user has given a template file
209                                 if(it != parameters.end()){ 
210                                         path = m->hasPath(it->second);
211                                         //if the user has not given a path then, add inputdir. else leave path alone.
212                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
213                                 }
214                                 
215                                 it = parameters.find("qfile");
216                                 //user has given a template file
217                                 if(it != parameters.end()){ 
218                                         path = m->hasPath(it->second);
219                                         //if the user has not given a path then, add inputdir. else leave path alone.
220                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
221                                 }
222                 
223                 it = parameters.find("count");
224                                 //user has given a template file
225                                 if(it != parameters.end()){ 
226                                         path = m->hasPath(it->second);
227                                         //if the user has not given a path then, add inputdir. else leave path alone.
228                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
229                                 }
230                         }
231
232                         
233                         //check for required parameters
234                         accnosfile = validParameter.validFile(parameters, "accnos", true);
235                         if (accnosfile == "not open") { abort = true; }
236                         else if (accnosfile == "not found") {  
237                                 accnosfile = m->getAccnosFile(); 
238                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
239                                 else { 
240                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
241                                         abort = true;
242                                 } 
243                         }else { m->setAccnosFile(accnosfile); } 
244                         
245                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
246                         
247                         fastafile = validParameter.validFile(parameters, "fasta", true);
248                         if (fastafile == "not open") { fastafile = ""; abort = true; }
249                         else if (fastafile == "not found") {  fastafile = "";  }
250                         else { m->setFastaFile(fastafile); }
251                         
252                         namefile = validParameter.validFile(parameters, "name", true);
253                         if (namefile == "not open") { namefile = ""; abort = true; }
254                         else if (namefile == "not found") {  namefile = "";  }  
255                         else { m->setNameFile(namefile); }
256                         
257                         groupfile = validParameter.validFile(parameters, "group", true);
258                         if (groupfile == "not open") { abort = true; }
259                         else if (groupfile == "not found") {  groupfile = "";  }        
260                         else { m->setGroupFile(groupfile); }
261                         
262                         alignfile = validParameter.validFile(parameters, "alignreport", true);
263                         if (alignfile == "not open") { abort = true; }
264                         else if (alignfile == "not found") {  alignfile = "";  }
265                         
266                         listfile = validParameter.validFile(parameters, "list", true);
267                         if (listfile == "not open") { abort = true; }
268                         else if (listfile == "not found") {  listfile = "";  }
269                         else { m->setListFile(listfile); }
270                         
271                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
272                         if (taxfile == "not open") { taxfile = ""; abort = true; }
273                         else if (taxfile == "not found") {  taxfile = "";  }
274                         else { m->setTaxonomyFile(taxfile); }
275                         
276                         qualfile = validParameter.validFile(parameters, "qfile", true);
277                         if (qualfile == "not open") { abort = true; }
278                         else if (qualfile == "not found") {  qualfile = "";  }
279                         else { m->setQualFile(qualfile); }
280                         
281                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
282                         if (accnosfile2 == "not open") { abort = true; }
283                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
284                         
285             countfile = validParameter.validFile(parameters, "count", true);
286             if (countfile == "not open") { countfile = ""; abort = true; }
287             else if (countfile == "not found") { countfile = "";  }     
288             else { m->setCountTableFile(countfile); }
289             
290             if ((namefile != "") && (countfile != "")) {
291                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
292             }
293             
294             if ((groupfile != "") && (countfile != "")) {
295                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
296             }
297
298                         
299                         string usedDups = "true";
300                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
301                         dups = m->isTrue(temp);
302                         
303                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
304             
305             if (countfile == "") {
306                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
307                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
308                     parser.getNameFile(files);
309                 }
310             }
311                 }
312
313         }
314         catch(exception& e) {
315                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
316                 exit(1);
317         }
318 }
319 //**********************************************************************************************************************
320
321 int GetSeqsCommand::execute(){
322         try {
323                 
324                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
325                 
326                 //get names you want to keep
327                 names = m->readAccnos(accnosfile);
328                 
329                 if (m->control_pressed) { return 0; }
330         
331         if (countfile != "") {
332             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
333                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
334             }
335         }
336                 
337                 //read through the correct file and output lines you want to keep
338                 if (namefile != "")                     {               readName();                     }
339                 if (fastafile != "")            {               readFasta();            }
340                 if (groupfile != "")            {               readGroup();            }
341         if (countfile != "")            {               readCount();            }
342                 if (alignfile != "")            {               readAlign();            }
343                 if (listfile != "")                     {               readList();                     }
344                 if (taxfile != "")                      {               readTax();                      }
345                 if (qualfile != "")                     {               readQual();                     }
346                 if (accnosfile2 != "")          {               compareAccnos();        }
347         
348         if (m->debug) { runSanityCheck(); }
349                 
350                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
351                 
352                 
353                 if (outputNames.size() != 0) {
354                         m->mothurOutEndLine();
355                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
356                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
357                         m->mothurOutEndLine();
358                         
359                         //set fasta file as new current fastafile
360                         string current = "";
361                         itTypes = outputTypes.find("fasta");
362                         if (itTypes != outputTypes.end()) {
363                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
364                         }
365                         
366                         itTypes = outputTypes.find("name");
367                         if (itTypes != outputTypes.end()) {
368                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
369                         }
370                         
371                         itTypes = outputTypes.find("group");
372                         if (itTypes != outputTypes.end()) {
373                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
374                         }
375                         
376                         itTypes = outputTypes.find("list");
377                         if (itTypes != outputTypes.end()) {
378                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
379                         }
380                         
381                         itTypes = outputTypes.find("taxonomy");
382                         if (itTypes != outputTypes.end()) {
383                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
384                         }
385                         
386                         itTypes = outputTypes.find("qfile");
387                         if (itTypes != outputTypes.end()) {
388                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
389                         }
390                         
391             itTypes = outputTypes.find("count");
392                         if (itTypes != outputTypes.end()) {
393                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
394                         }
395                 }
396                 
397                 return 0;               
398         }
399
400         catch(exception& e) {
401                 m->errorOut(e, "GetSeqsCommand", "execute");
402                 exit(1);
403         }
404 }
405
406 //**********************************************************************************************************************
407 int GetSeqsCommand::readFasta(){
408         try {
409                 string thisOutputDir = outputDir;
410                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
411                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
412                 ofstream out;
413                 m->openOutputFile(outputFileName, out);
414                 
415                 
416                 ifstream in;
417                 m->openInputFile(fastafile, in);
418                 string name;
419                 
420                 bool wroteSomething = false;
421                 int selectedCount = 0;
422         
423         if (m->debug) { set<string> temp; sanity["fasta"] = temp; }
424                 
425                 while(!in.eof()){
426                 
427                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
428                         
429                         Sequence currSeq(in);
430                         name = currSeq.getName();
431                         
432                         if (name != "") {
433                                 //if this name is in the accnos file
434                                 if (names.count(name) != 0) {
435                                         wroteSomething = true;
436                                         
437                                         currSeq.printSequence(out);
438                                         selectedCount++;
439                     
440                     if (m->debug) { sanity["fasta"].insert(name); }
441                                 }
442                         }
443                         m->gobble(in);
444                 }
445                 in.close();     
446                 out.close();
447                 
448                 
449                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
450                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
451                 
452                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
453                 
454                 return 0;
455
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "GetSeqsCommand", "readFasta");
459                 exit(1);
460         }
461 }
462 //**********************************************************************************************************************
463 int GetSeqsCommand::readQual(){
464         try {
465                 string thisOutputDir = outputDir;
466                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
467                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
468                 ofstream out;
469                 m->openOutputFile(outputFileName, out);
470                 
471                 
472                 ifstream in;
473                 m->openInputFile(qualfile, in);
474                 string name;
475                 
476                 bool wroteSomething = false;
477                 int selectedCount = 0;
478                 
479         if (m->debug) { set<string> temp; sanity["qual"] = temp; }
480                 
481                 while(!in.eof()){       
482                         string saveName = "";
483                         string name = "";
484                         string scores = "";
485                         
486                         in >> name; 
487                                 
488                         if (name.length() != 0) { 
489                                 saveName = name.substr(1);
490                                 while (!in.eof())       {       
491                                         char c = in.get(); 
492                                         if (c == 10 || c == 13){        break;  }
493                                         else { name += c; }     
494                                 } 
495                                 m->gobble(in);
496                         }
497                         
498                         while(in){
499                                 char letter= in.get();
500                                 if(letter == '>'){      in.putback(letter);     break;  }
501                                 else{ scores += letter; }
502                         }
503                         
504                         m->gobble(in);
505                         
506                         if (names.count(saveName) != 0) {
507                                 wroteSomething = true;
508                                                 
509                                 out << name << endl << scores;
510                                 selectedCount++;
511                 if (m->debug) { sanity["qual"].insert(name); }
512                         }
513                         
514                         m->gobble(in);
515                 }
516                 in.close();
517                 out.close();
518                 
519                 
520                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
521                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
522                 
523                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
524
525                 
526                 return 0;
527                 
528         }
529         catch(exception& e) {
530                 m->errorOut(e, "GetSeqsCommand", "readQual");
531                 exit(1);
532         }
533 }
534 //**********************************************************************************************************************
535 int GetSeqsCommand::readCount(){
536         try {
537                 string thisOutputDir = outputDir;
538                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
539                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
540                 
541                 ofstream out;
542                 m->openOutputFile(outputFileName, out);
543                 
544                 ifstream in;
545                 m->openInputFile(countfile, in);
546                 
547                 bool wroteSomething = false;
548                 int selectedCount = 0;
549                 
550         string headers = m->getline(in); m->gobble(in);
551         out << headers << endl;
552         
553         string name, rest; int thisTotal;
554         while (!in.eof()) {
555             
556             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
557             
558             in >> name; m->gobble(in); 
559             in >> thisTotal; m->gobble(in);
560             rest = m->getline(in); m->gobble(in);
561             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
562             
563             if (names.count(name) != 0) {
564                 out << name << '\t' << thisTotal << '\t' << rest << endl;
565                 wroteSomething = true;
566                 selectedCount+= thisTotal;
567             }
568         }
569         in.close();
570                 out.close();
571                 
572                 if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
573                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
574                 
575                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
576         
577                 return 0;
578         }
579         catch(exception& e) {
580                 m->errorOut(e, "GetSeqsCommand", "readCount");
581                 exit(1);
582         }
583 }
584
585 //**********************************************************************************************************************
586 int GetSeqsCommand::readList(){
587         try {
588                 string thisOutputDir = outputDir;
589                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
590                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
591                 ofstream out;
592                 m->openOutputFile(outputFileName, out);
593                 
594                 ifstream in;
595                 m->openInputFile(listfile, in);
596                 
597                 bool wroteSomething = false;
598                 int selectedCount = 0;
599         
600         if (m->debug) { set<string> temp; sanity["list"] = temp; }
601                 
602                 while(!in.eof()){
603                         
604                         selectedCount = 0;
605                         
606                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
607
608                         //read in list vector
609                         ListVector list(in);
610                         
611                         //make a new list vector
612                         ListVector newList;
613                         newList.setLabel(list.getLabel());
614                         
615                         //for each bin
616                         for (int i = 0; i < list.getNumBins(); i++) {
617                         
618                                 //parse out names that are in accnos file
619                                 string binnames = list.get(i);
620                                 
621                                 string newNames = "";
622                                 while (binnames.find_first_of(',') != -1) { 
623                                         string name = binnames.substr(0,binnames.find_first_of(','));
624                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
625                                         
626                                         //if that name is in the .accnos file, add it
627                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
628                                 }
629                         
630                                 //get last name
631                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  if (m->debug) { sanity["list"].insert(binnames); } }
632
633                                 //if there are names in this bin add to new list
634                                 if (newNames != "") { 
635                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
636                                         newList.push_back(newNames);    
637                                 }
638                         }
639                                 
640                         //print new listvector
641                         if (newList.getNumBins() != 0) {
642                                 wroteSomething = true;
643                                 newList.print(out);
644                         }
645                         
646                         m->gobble(in);
647                 }
648                 in.close();     
649                 out.close();
650                 
651                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
652                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
653                 
654                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
655                 
656                 return 0;
657
658         }
659         catch(exception& e) {
660                 m->errorOut(e, "GetSeqsCommand", "readList");
661                 exit(1);
662         }
663 }
664 //**********************************************************************************************************************
665 int GetSeqsCommand::readName(){
666         try {
667                 string thisOutputDir = outputDir;
668                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
669                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
670                 ofstream out;
671                 m->openOutputFile(outputFileName, out);
672                 
673
674                 ifstream in;
675                 m->openInputFile(namefile, in);
676                 string name, firstCol, secondCol;
677                 
678                 bool wroteSomething = false;
679                 int selectedCount = 0;
680         
681         if (m->debug) { set<string> temp; sanity["name"] = temp; }
682         if (m->debug) { set<string> temp; sanity["dupname"] = temp; }
683                 
684                 while(!in.eof()){
685                 
686                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
687
688                         in >> firstCol;                         
689                         in >> secondCol;
690                         
691                         string hold = "";
692                         if (dups) { hold = secondCol; }
693                         
694                         vector<string> parsedNames;
695                         m->splitAtComma(secondCol, parsedNames);
696                         
697                         vector<string> validSecond;
698                         for (int i = 0; i < parsedNames.size(); i++) {
699                                 if (names.count(parsedNames[i]) != 0) {
700                                         validSecond.push_back(parsedNames[i]);
701                     if (m->debug) { sanity["dupname"].insert(parsedNames[i]); }
702                                 }
703                         }
704
705                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
706                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]); if (m->debug) { sanity["dupname"].insert(parsedNames[i]); } }
707                                 out << firstCol << '\t' << hold << endl;
708                                 wroteSomething = true;
709                                 selectedCount += parsedNames.size();
710                 if (m->debug) { sanity["name"].insert(firstCol); }
711                         }else {
712                                 selectedCount += validSecond.size();
713                                 
714                                 //if the name in the first column is in the set then print it and any other names in second column also in set
715                                 if (names.count(firstCol) != 0) {
716                                 
717                                         wroteSomething = true;
718                                         
719                                         out << firstCol << '\t';
720                                         
721                                         //you know you have at least one valid second since first column is valid
722                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
723                                         out << validSecond[validSecond.size()-1] << endl;
724                     
725                     if (m->debug) { sanity["name"].insert(firstCol); }
726                                         
727                                 
728                                 //make first name in set you come to first column and then add the remaining names to second column
729                                 }else {
730                                         //you want part of this row
731                                         if (validSecond.size() != 0) {
732                                         
733                                                 wroteSomething = true;
734                                                 
735                                                 out << validSecond[0] << '\t';
736                                         
737                                                 //you know you have at least one valid second since first column is valid
738                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
739                                                 out << validSecond[validSecond.size()-1] << endl;
740                         
741                         if (m->debug) { sanity["name"].insert(validSecond[0]); }
742                                         }
743                                 }
744                         }
745                         m->gobble(in);
746                 }
747                 in.close();
748                 out.close();
749                 
750                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
751                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
752                 
753                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
754                 
755                 return 0;
756                 
757         }
758         catch(exception& e) {
759                 m->errorOut(e, "GetSeqsCommand", "readName");
760                 exit(1);
761         }
762 }
763
764 //**********************************************************************************************************************
765 int GetSeqsCommand::readGroup(){
766         try {
767                 string thisOutputDir = outputDir;
768                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
769                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
770                 ofstream out;
771                 m->openOutputFile(outputFileName, out);
772                 
773
774                 ifstream in;
775                 m->openInputFile(groupfile, in);
776                 string name, group;
777                 
778                 bool wroteSomething = false;
779                 int selectedCount = 0;
780         
781         if (m->debug) { set<string> temp; sanity["group"] = temp; }
782                 
783                 while(!in.eof()){
784
785                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
786
787
788                         in >> name;                             //read from first column
789                         in >> group;                    //read from second column
790                         
791                         //if this name is in the accnos file
792                         if (names.count(name) != 0) {
793                                 wroteSomething = true;
794                                 
795                                 out << name << '\t' << group << endl;
796                                 selectedCount++;
797                 
798                 if (m->debug) {  sanity["group"].insert(name); }
799                         }
800                                         
801                         m->gobble(in);
802                 }
803                 in.close();
804                 out.close();
805                 
806                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
807                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
808                 
809                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
810
811                 
812                 return 0;
813
814         }
815         catch(exception& e) {
816                 m->errorOut(e, "GetSeqsCommand", "readGroup");
817                 exit(1);
818         }
819 }
820 //**********************************************************************************************************************
821 int GetSeqsCommand::readTax(){
822         try {
823                 string thisOutputDir = outputDir;
824                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
825                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
826                 ofstream out;
827                 m->openOutputFile(outputFileName, out);
828                 
829                 ifstream in;
830                 m->openInputFile(taxfile, in);
831                 string name, tax;
832                 
833                 bool wroteSomething = false;
834                 int selectedCount = 0;
835         
836         if (m->debug) { set<string> temp; sanity["tax"] = temp; }
837                 
838                 while(!in.eof()){
839
840                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
841
842                         in >> name;                             //read from first column
843                         in >> tax;                      //read from second column
844                         
845                         //if this name is in the accnos file
846                         if (names.count(name) != 0) {
847                                 wroteSomething = true;
848                                 
849                                 out << name << '\t' << tax << endl;
850                                 selectedCount++;
851                 
852                 if (m->debug) { sanity["tax"].insert(name); }
853                         }
854                                         
855                         m->gobble(in);
856                 }
857                 in.close();
858                 out.close();
859                 
860                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
861                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
862                 
863                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
864                         
865                 return 0;
866
867         }
868         catch(exception& e) {
869                 m->errorOut(e, "GetSeqsCommand", "readTax");
870                 exit(1);
871         }
872 }
873 //**********************************************************************************************************************
874 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
875 int GetSeqsCommand::readAlign(){
876         try {
877                 string thisOutputDir = outputDir;
878                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
879                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
880                 ofstream out;
881                 m->openOutputFile(outputFileName, out);
882                 
883
884                 ifstream in;
885                 m->openInputFile(alignfile, in);
886                 string name, junk;
887                 
888                 bool wroteSomething = false;
889                 int selectedCount = 0;
890                 
891                 //read column headers
892                 for (int i = 0; i < 16; i++) {  
893                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
894                         else                    {       break;                  }
895                 }
896                 out << endl;
897                 
898                 while(!in.eof()){
899                 
900                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
901
902
903                         in >> name;                             //read from first column
904                         
905                         //if this name is in the accnos file
906                         if (names.count(name) != 0) {
907                                 wroteSomething = true;
908                                 selectedCount++;
909                                 
910                                 out << name << '\t';
911                                 
912                                 //read rest
913                                 for (int i = 0; i < 15; i++) {  
914                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
915                                         else                    {       break;                  }
916                                 }
917                                 out << endl;
918                                 
919                         }else {//still read just don't do anything with it
920                                 //read rest
921                                 for (int i = 0; i < 15; i++) {  
922                                         if (!in.eof())  {       in >> junk;             }
923                                         else                    {       break;                  }
924                                 }
925                         }
926                         
927                         m->gobble(in);
928                 }
929                 in.close();
930                 out.close();
931                 
932                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
933                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
934                 
935                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
936                 
937                 return 0;
938                 
939         }
940         catch(exception& e) {
941                 m->errorOut(e, "GetSeqsCommand", "readAlign");
942                 exit(1);
943         }
944 }
945 //**********************************************************************************************************************
946 //just looking at common mistakes. 
947 int GetSeqsCommand::runSanityCheck(){
948         try {
949         string thisOutputDir = outputDir;
950         if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
951         string filename = outputDir + "get.seqs.debug.report";
952         
953         ofstream out;
954                 m->openOutputFile(filename, out); 
955
956        
957         //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs
958         if (fastafile != "") {
959             if (namefile != "") { //compare with fasta
960                 if (sanity["fasta"] != sanity["name"]) { //create mismatch file
961                     createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]);
962                 }
963             }
964             if (qualfile != "") {
965                 if (sanity["fasta"] != sanity["qual"]) { //create mismatch file
966                     createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]);
967                 }
968             }
969             if (taxfile != "") {
970                 if (sanity["fasta"] != sanity["tax"]) { //create mismatch file
971                     createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]);
972                 }
973             }
974         }
975         
976         //compare dupnames, groups and list if given to make sure they match
977         if (namefile != "") {
978             if (groupfile != "") {
979                 if (sanity["dupname"] != sanity["group"]) { //create mismatch file
980                     createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]);
981                 } 
982             }
983             if (listfile != "") {
984                 if (sanity["dupname"] != sanity["list"]) { //create mismatch file
985                     createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]);
986                 } 
987             }
988         }else{
989
990             if ((groupfile != "") && (fastafile != "")) {
991                 if (sanity["fasta"] != sanity["group"]) { //create mismatch file
992                     createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]);
993                 } 
994             }
995         }
996         
997         out.close();
998         
999         if (m->isBlank(filename)) { m->mothurRemove(filename); }
1000         else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); }
1001         
1002         return 0;
1003     }
1004         catch(exception& e) {
1005                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1006                 exit(1);
1007         }
1008 }
1009 //**********************************************************************************************************************
1010 //just looking at common mistakes. 
1011 int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set<string> set1, set<string> set2){
1012         try {
1013         out << "****************************************" << endl << endl;
1014         out << "Names unique to " << filename1 << ":\n";
1015         
1016         //remove names in set1 that are also in set2
1017         for (set<string>::iterator it = set1.begin(); it != set1.end();) {
1018             string name = *it;
1019             
1020             if (set2.count(name) == 0)  { out << name << endl;  } //name unique to set1
1021             else                        { set2.erase(name);     } //you are in both so erase 
1022             set1.erase(it++);
1023         }
1024         
1025         out << "\nNames unique to " << filename2 << ":\n";
1026         //output results
1027         for (set<string>::iterator it = set2.begin(); it != set2.end(); it++) {  out << *it << endl;  }
1028         
1029         out << "****************************************" << endl << endl;
1030         
1031         return 0;
1032     }
1033         catch(exception& e) {
1034                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1035                 exit(1);
1036         }
1037 }
1038 //**********************************************************************************************************************
1039
1040 int GetSeqsCommand::compareAccnos(){
1041         try {
1042                 
1043                 string thisOutputDir = outputDir;
1044                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
1045                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + getOutputFileNameTag("accnosreport");
1046                 ofstream out;
1047                 m->openOutputFile(outputFileName, out);
1048                 
1049                 ifstream in;
1050                 m->openInputFile(accnosfile2, in);
1051                 string name;
1052                 
1053                 set<string> namesAccnos2;
1054                 set<string> namesDups;
1055                 set<string> namesAccnos = names;
1056                 
1057                 map<string, int> nameCount;
1058                 
1059                 if (namefile != "") {
1060                         ifstream inName;
1061                         m->openInputFile(namefile, inName);
1062                         
1063                         
1064                         while(!inName.eof()){
1065                                 
1066                                 if (m->control_pressed) { inName.close(); return 0; }
1067                                 
1068                                 string thisname, repnames;
1069                                 
1070                                 inName >> thisname;             m->gobble(inName);              //read from first column
1071                                 inName >> repnames;                     //read from second column
1072                                 
1073                                 int num = m->getNumNames(repnames);
1074                                 nameCount[thisname] = num;
1075                                 
1076                                 m->gobble(inName);
1077                         }
1078                         inName.close(); 
1079                 }
1080                 
1081                 while(!in.eof()){
1082                         in >> name;
1083                         
1084                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
1085                                 int pos = name.find_last_of('_');
1086                                 string tempName = name;
1087                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
1088                                 if (namesAccnos.count(tempName) == 0){
1089                                         namesAccnos2.insert(name);
1090                                 }else { //you are in both so erase
1091                                         namesAccnos.erase(name);
1092                                         namesDups.insert(name);
1093                                 }
1094                         }else { //you are in both so erase
1095                                 namesAccnos.erase(name);
1096                                 namesDups.insert(name);
1097                         }
1098                         
1099                         m->gobble(in);
1100                 }
1101                 in.close();     
1102                 
1103                 out << "Names in both files : " + toString(namesDups.size()) << endl;
1104                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
1105                 
1106                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
1107                         out << (*it);
1108                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1109                         out << endl;
1110                 }
1111                 
1112                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
1113                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
1114                 
1115                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
1116                         out << (*it);
1117                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1118                         out << endl;
1119                 }
1120                 
1121                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
1122                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
1123                 
1124                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
1125                         out << (*it);
1126                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1127                         out << endl;
1128                 }
1129
1130                 out.close(); 
1131                 
1132                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
1133                 
1134                 return 0;
1135                 
1136         }
1137         catch(exception& e) {
1138                 m->errorOut(e, "GetSeqsCommand", "compareAccnos");
1139                 exit(1);
1140         }
1141 }
1142
1143
1144 //**********************************************************************************************************************
1145