]> git.donarmstrong.com Git - mothur.git/blob - getseqscommand.cpp
added count file to get.oturep, pre.cluster, screen.seqs, tree.shared. made remove...
[mothur.git] / getseqscommand.cpp
1 /*
2  *  getseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/8/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> GetSeqsCommand::setParameters(){ 
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
23                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
24                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
25                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pqfile);
26                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos);
27                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 CommandParameter paccnos2("accnos2", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(paccnos2);
31
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "GetSeqsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string GetSeqsCommand::getHelpString(){ 
43         try {
44                 string helpString = "";
45                 helpString += "The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, count, list, taxonomy, quality or alignreport file.\n";
46                 helpString += "It outputs a file containing only the sequences in the .accnos file.\n";
47                 helpString += "The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, qfile, alignreport and dups.  You must provide accnos unless you have a valid current accnos file, and at least one of the other parameters.\n";
48                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
49                 helpString += "The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
50                 helpString += "Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "GetSeqsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61 GetSeqsCommand::GetSeqsCommand(){       
62         try {
63                 abort = true; calledHelp = true;
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["fasta"] = tempOutNames;
67                 outputTypes["taxonomy"] = tempOutNames;
68                 outputTypes["name"] = tempOutNames;
69                 outputTypes["group"] = tempOutNames;
70                 outputTypes["alignreport"] = tempOutNames;
71                 outputTypes["list"] = tempOutNames;
72                 outputTypes["qfile"] = tempOutNames;
73         outputTypes["count"] = tempOutNames;
74                 outputTypes["accnosreport"] = tempOutNames;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
78                 exit(1);
79         }
80 }
81 //**********************************************************************************************************************
82 string GetSeqsCommand::getOutputFileNameTag(string type, string inputName=""){  
83         try {
84         string outputFileName = "";
85                 map<string, vector<string> >::iterator it;
86         
87         //is this a type this command creates
88         it = outputTypes.find(type);
89         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
90         else {
91             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
92             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
93             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
94             else if (type == "count")       {   outputFileName =  "pick.count_table";   }
95             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
96             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
97             else if (type == "qfile")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
98             else if (type == "accnosreport"){   outputFileName =  "accnos.report";                       }
99             else if (type == "alignreport") {   outputFileName =  "pick.align.report";                   }
100             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
101         }
102         return outputFileName;
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "GetSeqsCommand", "getOutputFileNameTag");
106                 exit(1);
107         }
108 }
109 //**********************************************************************************************************************
110 GetSeqsCommand::GetSeqsCommand(string option)  {
111         try {
112                 abort = false; calledHelp = false;   
113                                 
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters = parser.getParameters();
123                         
124                         ValidParameters validParameter;
125                         map<string,string>::iterator it;
126                         
127                         //check to make sure all parameters are valid for command
128                         for (it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["fasta"] = tempOutNames;
135                         outputTypes["taxonomy"] = tempOutNames;
136                         outputTypes["name"] = tempOutNames;
137                         outputTypes["group"] = tempOutNames;
138                         outputTypes["alignreport"] = tempOutNames;
139                         outputTypes["list"] = tempOutNames;
140                         outputTypes["qfile"] = tempOutNames;
141                         outputTypes["accnosreport"] = tempOutNames;
142             outputTypes["count"] = tempOutNames;
143                         
144                         //if the user changes the output directory command factory will send this info to us in the output parameter 
145                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("alignreport");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
158                                 }
159                                 
160                                 it = parameters.find("fasta");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
166                                 }
167                                 
168                                 it = parameters.find("accnos");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
174                                 }
175                                 
176                                 it = parameters.find("accnos2");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["accnos2"] = inputDir + it->second;          }
182                                 }
183                                 
184                                 it = parameters.find("list");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
190                                 }
191                                 
192                                 it = parameters.find("name");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
198                                 }
199                                 
200                                 it = parameters.find("group");
201                                 //user has given a template file
202                                 if(it != parameters.end()){ 
203                                         path = m->hasPath(it->second);
204                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
206                                 }
207                                 
208                                 it = parameters.find("taxonomy");
209                                 //user has given a template file
210                                 if(it != parameters.end()){ 
211                                         path = m->hasPath(it->second);
212                                         //if the user has not given a path then, add inputdir. else leave path alone.
213                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
214                                 }
215                                 
216                                 it = parameters.find("qfile");
217                                 //user has given a template file
218                                 if(it != parameters.end()){ 
219                                         path = m->hasPath(it->second);
220                                         //if the user has not given a path then, add inputdir. else leave path alone.
221                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
222                                 }
223                 
224                 it = parameters.find("count");
225                                 //user has given a template file
226                                 if(it != parameters.end()){ 
227                                         path = m->hasPath(it->second);
228                                         //if the user has not given a path then, add inputdir. else leave path alone.
229                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
230                                 }
231                         }
232
233                         
234                         //check for required parameters
235                         accnosfile = validParameter.validFile(parameters, "accnos", true);
236                         if (accnosfile == "not open") { abort = true; }
237                         else if (accnosfile == "not found") {  
238                                 accnosfile = m->getAccnosFile(); 
239                                 if (accnosfile != "") {  m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
240                                 else { 
241                                         m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine(); 
242                                         abort = true;
243                                 } 
244                         }else { m->setAccnosFile(accnosfile); } 
245                         
246                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
247                         
248                         fastafile = validParameter.validFile(parameters, "fasta", true);
249                         if (fastafile == "not open") { fastafile = ""; abort = true; }
250                         else if (fastafile == "not found") {  fastafile = "";  }
251                         else { m->setFastaFile(fastafile); }
252                         
253                         namefile = validParameter.validFile(parameters, "name", true);
254                         if (namefile == "not open") { namefile = ""; abort = true; }
255                         else if (namefile == "not found") {  namefile = "";  }  
256                         else { m->setNameFile(namefile); }
257                         
258                         groupfile = validParameter.validFile(parameters, "group", true);
259                         if (groupfile == "not open") { abort = true; }
260                         else if (groupfile == "not found") {  groupfile = "";  }        
261                         else { m->setGroupFile(groupfile); }
262                         
263                         alignfile = validParameter.validFile(parameters, "alignreport", true);
264                         if (alignfile == "not open") { abort = true; }
265                         else if (alignfile == "not found") {  alignfile = "";  }
266                         
267                         listfile = validParameter.validFile(parameters, "list", true);
268                         if (listfile == "not open") { abort = true; }
269                         else if (listfile == "not found") {  listfile = "";  }
270                         else { m->setListFile(listfile); }
271                         
272                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
273                         if (taxfile == "not open") { taxfile = ""; abort = true; }
274                         else if (taxfile == "not found") {  taxfile = "";  }
275                         else { m->setTaxonomyFile(taxfile); }
276                         
277                         qualfile = validParameter.validFile(parameters, "qfile", true);
278                         if (qualfile == "not open") { abort = true; }
279                         else if (qualfile == "not found") {  qualfile = "";  }
280                         else { m->setQualFile(qualfile); }
281                         
282                         accnosfile2 = validParameter.validFile(parameters, "accnos2", true);
283                         if (accnosfile2 == "not open") { abort = true; }
284                         else if (accnosfile2 == "not found") {  accnosfile2 = "";  }
285                         
286             countfile = validParameter.validFile(parameters, "count", true);
287             if (countfile == "not open") { countfile = ""; abort = true; }
288             else if (countfile == "not found") { countfile = "";  }     
289             else { m->setCountTableFile(countfile); }
290             
291             if ((namefile != "") && (countfile != "")) {
292                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
293             }
294             
295             if ((groupfile != "") && (countfile != "")) {
296                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
297             }
298
299                         
300                         string usedDups = "true";
301                         string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "true"; usedDups = ""; }
302                         dups = m->isTrue(temp);
303                         
304                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
305             
306             if (countfile == "") {
307                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
308                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
309                     parser.getNameFile(files);
310                 }
311             }
312                 }
313
314         }
315         catch(exception& e) {
316                 m->errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
317                 exit(1);
318         }
319 }
320 //**********************************************************************************************************************
321
322 int GetSeqsCommand::execute(){
323         try {
324                 
325                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
326                 
327                 //get names you want to keep
328                 names = m->readAccnos(accnosfile);
329                 
330                 if (m->control_pressed) { return 0; }
331         
332         if (countfile != "") {
333             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
334                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
335             }
336         }
337                 
338                 //read through the correct file and output lines you want to keep
339                 if (namefile != "")                     {               readName();                     }
340                 if (fastafile != "")            {               readFasta();            }
341                 if (groupfile != "")            {               readGroup();            }
342         if (countfile != "")            {               readCount();            }
343                 if (alignfile != "")            {               readAlign();            }
344                 if (listfile != "")                     {               readList();                     }
345                 if (taxfile != "")                      {               readTax();                      }
346                 if (qualfile != "")                     {               readQual();                     }
347                 if (accnosfile2 != "")          {               compareAccnos();        }
348         
349         if (m->debug) { runSanityCheck(); }
350                 
351                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
352                 
353                 
354                 if (outputNames.size() != 0) {
355                         m->mothurOutEndLine();
356                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
357                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
358                         m->mothurOutEndLine();
359                         
360                         //set fasta file as new current fastafile
361                         string current = "";
362                         itTypes = outputTypes.find("fasta");
363                         if (itTypes != outputTypes.end()) {
364                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
365                         }
366                         
367                         itTypes = outputTypes.find("name");
368                         if (itTypes != outputTypes.end()) {
369                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
370                         }
371                         
372                         itTypes = outputTypes.find("group");
373                         if (itTypes != outputTypes.end()) {
374                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
375                         }
376                         
377                         itTypes = outputTypes.find("list");
378                         if (itTypes != outputTypes.end()) {
379                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
380                         }
381                         
382                         itTypes = outputTypes.find("taxonomy");
383                         if (itTypes != outputTypes.end()) {
384                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
385                         }
386                         
387                         itTypes = outputTypes.find("qfile");
388                         if (itTypes != outputTypes.end()) {
389                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
390                         }
391                         
392             itTypes = outputTypes.find("count");
393                         if (itTypes != outputTypes.end()) {
394                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
395                         }
396                 }
397                 
398                 return 0;               
399         }
400
401         catch(exception& e) {
402                 m->errorOut(e, "GetSeqsCommand", "execute");
403                 exit(1);
404         }
405 }
406
407 //**********************************************************************************************************************
408 int GetSeqsCommand::readFasta(){
409         try {
410                 string thisOutputDir = outputDir;
411                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
412                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
413                 ofstream out;
414                 m->openOutputFile(outputFileName, out);
415                 
416                 
417                 ifstream in;
418                 m->openInputFile(fastafile, in);
419                 string name;
420                 
421                 bool wroteSomething = false;
422                 int selectedCount = 0;
423         
424         if (m->debug) { set<string> temp; sanity["fasta"] = temp; }
425                 
426                 while(!in.eof()){
427                 
428                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
429                         
430                         Sequence currSeq(in);
431                         name = currSeq.getName();
432                         
433                         if (name != "") {
434                                 //if this name is in the accnos file
435                                 if (names.count(name) != 0) {
436                                         wroteSomething = true;
437                                         
438                                         currSeq.printSequence(out);
439                                         selectedCount++;
440                     
441                     if (m->debug) { sanity["fasta"].insert(name); }
442                                 }
443                         }
444                         m->gobble(in);
445                 }
446                 in.close();     
447                 out.close();
448                 
449                 
450                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
451                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName); 
452                 
453                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
454                 
455                 return 0;
456
457         }
458         catch(exception& e) {
459                 m->errorOut(e, "GetSeqsCommand", "readFasta");
460                 exit(1);
461         }
462 }
463 //**********************************************************************************************************************
464 int GetSeqsCommand::readQual(){
465         try {
466                 string thisOutputDir = outputDir;
467                 if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
468                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
469                 ofstream out;
470                 m->openOutputFile(outputFileName, out);
471                 
472                 
473                 ifstream in;
474                 m->openInputFile(qualfile, in);
475                 string name;
476                 
477                 bool wroteSomething = false;
478                 int selectedCount = 0;
479                 
480         if (m->debug) { set<string> temp; sanity["qual"] = temp; }
481                 
482                 while(!in.eof()){       
483                         string saveName = "";
484                         string name = "";
485                         string scores = "";
486                         
487                         in >> name; 
488                                 
489                         if (name.length() != 0) { 
490                                 saveName = name.substr(1);
491                                 while (!in.eof())       {       
492                                         char c = in.get(); 
493                                         if (c == 10 || c == 13){        break;  }
494                                         else { name += c; }     
495                                 } 
496                                 m->gobble(in);
497                         }
498                         
499                         while(in){
500                                 char letter= in.get();
501                                 if(letter == '>'){      in.putback(letter);     break;  }
502                                 else{ scores += letter; }
503                         }
504                         
505                         m->gobble(in);
506                         
507                         if (names.count(saveName) != 0) {
508                                 wroteSomething = true;
509                                                 
510                                 out << name << endl << scores;
511                                 selectedCount++;
512                 if (m->debug) { sanity["qual"].insert(name); }
513                         }
514                         
515                         m->gobble(in);
516                 }
517                 in.close();
518                 out.close();
519                 
520                 
521                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
522                 outputNames.push_back(outputFileName);  outputTypes["qfile"].push_back(outputFileName); 
523                 
524                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your quality file."); m->mothurOutEndLine();
525
526                 
527                 return 0;
528                 
529         }
530         catch(exception& e) {
531                 m->errorOut(e, "GetSeqsCommand", "readQual");
532                 exit(1);
533         }
534 }
535 //**********************************************************************************************************************
536 int GetSeqsCommand::readCount(){
537         try {
538                 string thisOutputDir = outputDir;
539                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
540                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
541                 
542                 ofstream out;
543                 m->openOutputFile(outputFileName, out);
544                 
545                 ifstream in;
546                 m->openInputFile(countfile, in);
547                 
548                 bool wroteSomething = false;
549                 int selectedCount = 0;
550                 
551         string headers = m->getline(in); m->gobble(in);
552         out << headers << endl;
553         
554         string name, rest; int thisTotal;
555         while (!in.eof()) {
556             
557             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
558             
559             in >> name; m->gobble(in); 
560             in >> thisTotal; m->gobble(in);
561             rest = m->getline(in); m->gobble(in);
562             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
563             
564             if (names.count(name) != 0) {
565                 out << name << '\t' << thisTotal << '\t' << rest << endl;
566                 wroteSomething = true;
567                 selectedCount+= thisTotal;
568             }
569         }
570         in.close();
571                 out.close();
572         
573         //check for groups that have been eliminated
574         CountTable ct;
575         if (ct.testGroups(outputFileName)) {
576             ct.readTable(outputFileName);
577             ct.printTable(outputFileName);
578         }
579                 
580                 if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
581                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
582                 
583                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
584         
585                 return 0;
586         }
587         catch(exception& e) {
588                 m->errorOut(e, "GetSeqsCommand", "readCount");
589                 exit(1);
590         }
591 }
592
593 //**********************************************************************************************************************
594 int GetSeqsCommand::readList(){
595         try {
596                 string thisOutputDir = outputDir;
597                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
598                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
599                 ofstream out;
600                 m->openOutputFile(outputFileName, out);
601                 
602                 ifstream in;
603                 m->openInputFile(listfile, in);
604                 
605                 bool wroteSomething = false;
606                 int selectedCount = 0;
607         
608         if (m->debug) { set<string> temp; sanity["list"] = temp; }
609                 
610                 while(!in.eof()){
611                         
612                         selectedCount = 0;
613                         
614                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
615
616                         //read in list vector
617                         ListVector list(in);
618                         
619                         //make a new list vector
620                         ListVector newList;
621                         newList.setLabel(list.getLabel());
622                         
623                         //for each bin
624                         for (int i = 0; i < list.getNumBins(); i++) {
625                         
626                                 //parse out names that are in accnos file
627                                 string binnames = list.get(i);
628                 vector<string> bnames;
629                 m->splitAtComma(binnames, bnames);
630                                 
631                                 string newNames = "";
632                 for (int i = 0; i < bnames.size(); i++) {
633                                         string name = bnames[i];
634                                         //if that name is in the .accnos file, add it
635                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
636                                 }
637                         
638                                 //if there are names in this bin add to new list
639                                 if (newNames != "") { 
640                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
641                                         newList.push_back(newNames);    
642                                 }
643                         }
644                                 
645                         //print new listvector
646                         if (newList.getNumBins() != 0) {
647                                 wroteSomething = true;
648                                 newList.print(out);
649                         }
650                         
651                         m->gobble(in);
652                 }
653                 in.close();     
654                 out.close();
655                 
656                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
657                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
658                 
659                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
660                 
661                 return 0;
662
663         }
664         catch(exception& e) {
665                 m->errorOut(e, "GetSeqsCommand", "readList");
666                 exit(1);
667         }
668 }
669 //**********************************************************************************************************************
670 int GetSeqsCommand::readName(){
671         try {
672                 string thisOutputDir = outputDir;
673                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
674                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
675                 ofstream out;
676                 m->openOutputFile(outputFileName, out);
677                 
678
679                 ifstream in;
680                 m->openInputFile(namefile, in);
681                 string name, firstCol, secondCol;
682                 
683                 bool wroteSomething = false;
684                 int selectedCount = 0;
685         
686         if (m->debug) { set<string> temp; sanity["name"] = temp; }
687         if (m->debug) { set<string> temp; sanity["dupname"] = temp; }
688                 
689                 while(!in.eof()){
690                 
691                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
692
693                         in >> firstCol;                         
694                         in >> secondCol;
695                         
696                         string hold = "";
697                         if (dups) { hold = secondCol; }
698                         
699                         vector<string> parsedNames;
700                         m->splitAtComma(secondCol, parsedNames);
701                         
702                         vector<string> validSecond;
703                         for (int i = 0; i < parsedNames.size(); i++) {
704                                 if (names.count(parsedNames[i]) != 0) {
705                                         validSecond.push_back(parsedNames[i]);
706                     if (m->debug) { sanity["dupname"].insert(parsedNames[i]); }
707                                 }
708                         }
709
710                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
711                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]); if (m->debug) { sanity["dupname"].insert(parsedNames[i]); } }
712                                 out << firstCol << '\t' << hold << endl;
713                                 wroteSomething = true;
714                                 selectedCount += parsedNames.size();
715                 if (m->debug) { sanity["name"].insert(firstCol); }
716                         }else {
717                                 selectedCount += validSecond.size();
718                                 
719                                 //if the name in the first column is in the set then print it and any other names in second column also in set
720                                 if (names.count(firstCol) != 0) {
721                                 
722                                         wroteSomething = true;
723                                         
724                                         out << firstCol << '\t';
725                                         
726                                         //you know you have at least one valid second since first column is valid
727                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
728                                         out << validSecond[validSecond.size()-1] << endl;
729                     
730                     if (m->debug) { sanity["name"].insert(firstCol); }
731                                         
732                                 
733                                 //make first name in set you come to first column and then add the remaining names to second column
734                                 }else {
735                                         //you want part of this row
736                                         if (validSecond.size() != 0) {
737                                         
738                                                 wroteSomething = true;
739                                                 
740                                                 out << validSecond[0] << '\t';
741                                         
742                                                 //you know you have at least one valid second since first column is valid
743                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
744                                                 out << validSecond[validSecond.size()-1] << endl;
745                         
746                         if (m->debug) { sanity["name"].insert(validSecond[0]); }
747                                         }
748                                 }
749                         }
750                         m->gobble(in);
751                 }
752                 in.close();
753                 out.close();
754                 
755                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
756                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
757                 
758                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
759                 
760                 return 0;
761                 
762         }
763         catch(exception& e) {
764                 m->errorOut(e, "GetSeqsCommand", "readName");
765                 exit(1);
766         }
767 }
768
769 //**********************************************************************************************************************
770 int GetSeqsCommand::readGroup(){
771         try {
772                 string thisOutputDir = outputDir;
773                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
774                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
775                 ofstream out;
776                 m->openOutputFile(outputFileName, out);
777                 
778
779                 ifstream in;
780                 m->openInputFile(groupfile, in);
781                 string name, group;
782                 
783                 bool wroteSomething = false;
784                 int selectedCount = 0;
785         
786         if (m->debug) { set<string> temp; sanity["group"] = temp; }
787                 
788                 while(!in.eof()){
789
790                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
791
792
793                         in >> name;                             //read from first column
794                         in >> group;                    //read from second column
795                         
796                         //if this name is in the accnos file
797                         if (names.count(name) != 0) {
798                                 wroteSomething = true;
799                                 
800                                 out << name << '\t' << group << endl;
801                                 selectedCount++;
802                 
803                 if (m->debug) {  sanity["group"].insert(name); }
804                         }
805                                         
806                         m->gobble(in);
807                 }
808                 in.close();
809                 out.close();
810                 
811                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
812                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
813                 
814                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
815
816                 
817                 return 0;
818
819         }
820         catch(exception& e) {
821                 m->errorOut(e, "GetSeqsCommand", "readGroup");
822                 exit(1);
823         }
824 }
825 //**********************************************************************************************************************
826 int GetSeqsCommand::readTax(){
827         try {
828                 string thisOutputDir = outputDir;
829                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
830                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
831                 ofstream out;
832                 m->openOutputFile(outputFileName, out);
833                 
834                 ifstream in;
835                 m->openInputFile(taxfile, in);
836                 string name, tax;
837                 
838                 bool wroteSomething = false;
839                 int selectedCount = 0;
840         
841         if (m->debug) { set<string> temp; sanity["tax"] = temp; }
842                 
843                 while(!in.eof()){
844
845                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
846
847                         in >> name;                             //read from first column
848                         in >> tax;                      //read from second column
849                         
850                         //if this name is in the accnos file
851                         if (names.count(name) != 0) {
852                                 wroteSomething = true;
853                                 
854                                 out << name << '\t' << tax << endl;
855                                 selectedCount++;
856                 
857                 if (m->debug) { sanity["tax"].insert(name); }
858                         }
859                                         
860                         m->gobble(in);
861                 }
862                 in.close();
863                 out.close();
864                 
865                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
866                 outputNames.push_back(outputFileName);  outputTypes["taxonomy"].push_back(outputFileName);
867                 
868                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
869                         
870                 return 0;
871
872         }
873         catch(exception& e) {
874                 m->errorOut(e, "GetSeqsCommand", "readTax");
875                 exit(1);
876         }
877 }
878 //**********************************************************************************************************************
879 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
880 int GetSeqsCommand::readAlign(){
881         try {
882                 string thisOutputDir = outputDir;
883                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
884                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
885                 ofstream out;
886                 m->openOutputFile(outputFileName, out);
887                 
888
889                 ifstream in;
890                 m->openInputFile(alignfile, in);
891                 string name, junk;
892                 
893                 bool wroteSomething = false;
894                 int selectedCount = 0;
895                 
896                 //read column headers
897                 for (int i = 0; i < 16; i++) {  
898                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
899                         else                    {       break;                  }
900                 }
901                 out << endl;
902                 
903                 while(!in.eof()){
904                 
905                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
906
907
908                         in >> name;                             //read from first column
909                         
910                         //if this name is in the accnos file
911                         if (names.count(name) != 0) {
912                                 wroteSomething = true;
913                                 selectedCount++;
914                                 
915                                 out << name << '\t';
916                                 
917                                 //read rest
918                                 for (int i = 0; i < 15; i++) {  
919                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
920                                         else                    {       break;                  }
921                                 }
922                                 out << endl;
923                                 
924                         }else {//still read just don't do anything with it
925                                 //read rest
926                                 for (int i = 0; i < 15; i++) {  
927                                         if (!in.eof())  {       in >> junk;             }
928                                         else                    {       break;                  }
929                                 }
930                         }
931                         
932                         m->gobble(in);
933                 }
934                 in.close();
935                 out.close();
936                 
937                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
938                 outputNames.push_back(outputFileName);  outputTypes["alignreport"].push_back(outputFileName);
939                 
940                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
941                 
942                 return 0;
943                 
944         }
945         catch(exception& e) {
946                 m->errorOut(e, "GetSeqsCommand", "readAlign");
947                 exit(1);
948         }
949 }
950 //**********************************************************************************************************************
951 //just looking at common mistakes. 
952 int GetSeqsCommand::runSanityCheck(){
953         try {
954         string thisOutputDir = outputDir;
955         if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
956         string filename = outputDir + "get.seqs.debug.report";
957         
958         ofstream out;
959                 m->openOutputFile(filename, out); 
960
961        
962         //compare fasta, name, qual and taxonomy if given to make sure they contain the same seqs
963         if (fastafile != "") {
964             if (namefile != "") { //compare with fasta
965                 if (sanity["fasta"] != sanity["name"]) { //create mismatch file
966                     createMisMatchFile(out, fastafile, namefile, sanity["fasta"], sanity["name"]);
967                 }
968             }
969             if (qualfile != "") {
970                 if (sanity["fasta"] != sanity["qual"]) { //create mismatch file
971                     createMisMatchFile(out, fastafile, qualfile, sanity["fasta"], sanity["qual"]);
972                 }
973             }
974             if (taxfile != "") {
975                 if (sanity["fasta"] != sanity["tax"]) { //create mismatch file
976                     createMisMatchFile(out, fastafile, taxfile, sanity["fasta"], sanity["tax"]);
977                 }
978             }
979         }
980         
981         //compare dupnames, groups and list if given to make sure they match
982         if (namefile != "") {
983             if (groupfile != "") {
984                 if (sanity["dupname"] != sanity["group"]) { //create mismatch file
985                     createMisMatchFile(out, namefile, groupfile, sanity["dupname"], sanity["group"]);
986                 } 
987             }
988             if (listfile != "") {
989                 if (sanity["dupname"] != sanity["list"]) { //create mismatch file
990                     createMisMatchFile(out, namefile, listfile, sanity["dupname"], sanity["list"]);
991                 } 
992             }
993         }else{
994
995             if ((groupfile != "") && (fastafile != "")) {
996                 if (sanity["fasta"] != sanity["group"]) { //create mismatch file
997                     createMisMatchFile(out, fastafile, groupfile, sanity["fasta"], sanity["group"]);
998                 } 
999             }
1000         }
1001         
1002         out.close();
1003         
1004         if (m->isBlank(filename)) { m->mothurRemove(filename); }
1005         else { m->mothurOut("\n[DEBUG]: " + filename + " contains the file mismatches.\n");outputNames.push_back(filename); outputTypes["debug"].push_back(filename); }
1006         
1007         return 0;
1008     }
1009         catch(exception& e) {
1010                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1011                 exit(1);
1012         }
1013 }
1014 //**********************************************************************************************************************
1015 //just looking at common mistakes. 
1016 int GetSeqsCommand::createMisMatchFile(ofstream& out, string filename1, string filename2, set<string> set1, set<string> set2){
1017         try {
1018         out << "****************************************" << endl << endl;
1019         out << "Names unique to " << filename1 << ":\n";
1020         
1021         //remove names in set1 that are also in set2
1022         for (set<string>::iterator it = set1.begin(); it != set1.end();) {
1023             string name = *it;
1024             
1025             if (set2.count(name) == 0)  { out << name << endl;  } //name unique to set1
1026             else                        { set2.erase(name);     } //you are in both so erase 
1027             set1.erase(it++);
1028         }
1029         
1030         out << "\nNames unique to " << filename2 << ":\n";
1031         //output results
1032         for (set<string>::iterator it = set2.begin(); it != set2.end(); it++) {  out << *it << endl;  }
1033         
1034         out << "****************************************" << endl << endl;
1035         
1036         return 0;
1037     }
1038         catch(exception& e) {
1039                 m->errorOut(e, "GetSeqsCommand", "runSanityCheck");
1040                 exit(1);
1041         }
1042 }
1043 //**********************************************************************************************************************
1044
1045 int GetSeqsCommand::compareAccnos(){
1046         try {
1047                 
1048                 string thisOutputDir = outputDir;
1049                 if (outputDir == "") {  thisOutputDir += m->hasPath(accnosfile);  }
1050                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(accnosfile)) + getOutputFileNameTag("accnosreport");
1051                 ofstream out;
1052                 m->openOutputFile(outputFileName, out);
1053                 
1054                 ifstream in;
1055                 m->openInputFile(accnosfile2, in);
1056                 string name;
1057                 
1058                 set<string> namesAccnos2;
1059                 set<string> namesDups;
1060                 set<string> namesAccnos = names;
1061                 
1062                 map<string, int> nameCount;
1063                 
1064                 if (namefile != "") {
1065                         ifstream inName;
1066                         m->openInputFile(namefile, inName);
1067                         
1068                         
1069                         while(!inName.eof()){
1070                                 
1071                                 if (m->control_pressed) { inName.close(); return 0; }
1072                                 
1073                                 string thisname, repnames;
1074                                 
1075                                 inName >> thisname;             m->gobble(inName);              //read from first column
1076                                 inName >> repnames;                     //read from second column
1077                                 
1078                                 int num = m->getNumNames(repnames);
1079                                 nameCount[thisname] = num;
1080                                 
1081                                 m->gobble(inName);
1082                         }
1083                         inName.close(); 
1084                 }
1085                 
1086                 while(!in.eof()){
1087                         in >> name;
1088                         
1089                         if (namesAccnos.count(name) == 0){ //name unique to accnos2
1090                                 int pos = name.find_last_of('_');
1091                                 string tempName = name;
1092                                 if (pos != string::npos) {  tempName = tempName.substr(pos+1); cout << tempName << endl; }
1093                                 if (namesAccnos.count(tempName) == 0){
1094                                         namesAccnos2.insert(name);
1095                                 }else { //you are in both so erase
1096                                         namesAccnos.erase(name);
1097                                         namesDups.insert(name);
1098                                 }
1099                         }else { //you are in both so erase
1100                                 namesAccnos.erase(name);
1101                                 namesDups.insert(name);
1102                         }
1103                         
1104                         m->gobble(in);
1105                 }
1106                 in.close();     
1107                 
1108                 out << "Names in both files : " + toString(namesDups.size()) << endl;
1109                 m->mothurOut("Names in both files : " + toString(namesDups.size())); m->mothurOutEndLine();
1110                 
1111                 for (set<string>::iterator it = namesDups.begin(); it != namesDups.end(); it++) {
1112                         out << (*it);
1113                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1114                         out << endl;
1115                 }
1116                 
1117                 out << "Names unique to " + accnosfile + " : " + toString(namesAccnos.size()) << endl;
1118                 m->mothurOut("Names unique to " + accnosfile + " : " + toString(namesAccnos.size())); m->mothurOutEndLine();
1119                 
1120                 for (set<string>::iterator it = namesAccnos.begin(); it != namesAccnos.end(); it++) {
1121                         out << (*it);
1122                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1123                         out << endl;
1124                 }
1125                 
1126                 out << "Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size()) << endl;
1127                 m->mothurOut("Names unique to " + accnosfile2 + " : " + toString(namesAccnos2.size())); m->mothurOutEndLine();
1128                 
1129                 for (set<string>::iterator it = namesAccnos2.begin(); it != namesAccnos2.end(); it++) {
1130                         out << (*it);
1131                         if (namefile != "") { out << '\t' << nameCount[(*it)]; }
1132                         out << endl;
1133                 }
1134
1135                 out.close(); 
1136                 
1137                 outputNames.push_back(outputFileName);  outputTypes["accnosreport"].push_back(outputFileName);
1138                 
1139                 return 0;
1140                 
1141         }
1142         catch(exception& e) {
1143                 m->errorOut(e, "GetSeqsCommand", "compareAccnos");
1144                 exit(1);
1145         }
1146 }
1147
1148
1149 //**********************************************************************************************************************
1150