]> git.donarmstrong.com Git - mothur.git/blob - getlineagecommand.cpp
b895cc4fc62bfb49346c96df7a0d6371dd2b4a50
[mothur.git] / getlineagecommand.cpp
1 /*
2  *  getlineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> GetLineageCommand::setParameters(){      
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false, true); parameters.push_back(pfasta);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false, true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false, true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false, true); parameters.push_back(pgroup);
22                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
23                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,true, true); parameters.push_back(ptaxonomy);
24                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
25                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true, true); parameters.push_back(ptaxon);
26                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "GetLineageCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string GetLineageCommand::getHelpString(){      
41         try {
42                 string helpString = "";
43                 helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
45                 helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
48                 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
49                 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
50                 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
51                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
52                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
53                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
54                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
55                 return helpString;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "GetLineageCommand", "getHelpString");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 string GetLineageCommand::getOutputPattern(string type) {
64     try {
65         string pattern = "";
66         
67         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
68         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
69         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
70         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
71         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
72         else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
73         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
74         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
75         
76         return pattern;
77     }
78     catch(exception& e) {
79         m->errorOut(e, "GetLineageCommand", "getOutputPattern");
80         exit(1);
81     }
82 }
83 //**********************************************************************************************************************
84 GetLineageCommand::GetLineageCommand(){ 
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["fasta"] = tempOutNames;
90                 outputTypes["taxonomy"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["alignreport"] = tempOutNames;
94                 outputTypes["list"] = tempOutNames;
95         outputTypes["count"] = tempOutNames;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 GetLineageCommand::GetLineageCommand(string option)  {
104         try {
105                 abort = false; calledHelp = false;   
106                                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; calledHelp = true; }
109                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110                 
111                 else {
112                         vector<string> myArray = setParameters();
113                         
114                         OptionParser parser(option);
115                         map<string,string> parameters = parser.getParameters();
116                         
117                         ValidParameters validParameter;
118                         map<string,string>::iterator it;
119                         
120                         //check to make sure all parameters are valid for command
121                         for (it = parameters.begin(); it != parameters.end(); it++) { 
122                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
123                         }
124                         
125                         //initialize outputTypes
126                         vector<string> tempOutNames;
127                         outputTypes["fasta"] = tempOutNames;
128                         outputTypes["taxonomy"] = tempOutNames;
129                         outputTypes["name"] = tempOutNames;
130                         outputTypes["group"] = tempOutNames;
131                         outputTypes["alignreport"] = tempOutNames;
132                         outputTypes["list"] = tempOutNames;
133             outputTypes["count"] = tempOutNames;
134
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("alignreport");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
149                                 }
150                                 
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("list");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
165                                 }
166                                 
167                                 it = parameters.find("name");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
173                                 }
174                                 
175                                 it = parameters.find("group");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
181                                 }
182                                 
183                                 it = parameters.find("taxonomy");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
189                                 }
190                 
191                 it = parameters.find("count");
192                                 //user has given a template file
193                                 if(it != parameters.end()){ 
194                                         path = m->hasPath(it->second);
195                                         //if the user has not given a path then, add inputdir. else leave path alone.
196                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
197                                 }
198                         }
199
200                         
201                         //check for required parameters                 
202                         fastafile = validParameter.validFile(parameters, "fasta", true);
203                         if (fastafile == "not open") { fastafile = ""; abort = true; }
204                         else if (fastafile == "not found") {  fastafile = "";  }
205                         else { m->setFastaFile(fastafile); }
206                         
207                         namefile = validParameter.validFile(parameters, "name", true);
208                         if (namefile == "not open") { namefile = ""; abort = true; }
209                         else if (namefile == "not found") {  namefile = "";  }  
210                         else { m->setNameFile(namefile); }
211                         
212                         groupfile = validParameter.validFile(parameters, "group", true);
213                         if (groupfile == "not open") { abort = true; }
214                         else if (groupfile == "not found") {  groupfile = "";  }        
215                         else { m->setGroupFile(groupfile); }
216                         
217                         alignfile = validParameter.validFile(parameters, "alignreport", true);
218                         if (alignfile == "not open") { abort = true; }
219                         else if (alignfile == "not found") {  alignfile = "";  }
220                         
221                         listfile = validParameter.validFile(parameters, "list", true);
222                         if (listfile == "not open") { abort = true; }
223                         else if (listfile == "not found") {  listfile = "";  }
224                         else { m->setListFile(listfile); }
225                         
226                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
227                         if (taxfile == "not open") { taxfile = ""; abort = true; }
228                         else if (taxfile == "not found") {                              
229                                 taxfile = m->getTaxonomyFile(); 
230                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
231                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
232                         }else { m->setTaxonomyFile(taxfile); }
233                         
234                         string usedDups = "true";
235                         string temp = validParameter.validFile(parameters, "dups", false);      
236                         if (temp == "not found") { 
237                                 if (namefile != "") {  temp = "true";                                   }
238                                 else                            {  temp = "false"; usedDups = "";       }
239                         }
240                         dups = m->isTrue(temp);
241             
242             countfile = validParameter.validFile(parameters, "count", true);
243             if (countfile == "not open") { countfile = ""; abort = true; }
244             else if (countfile == "not found") { countfile = "";  }     
245             else { m->setCountTableFile(countfile); }
246             
247             if ((namefile != "") && (countfile != "")) {
248                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
249             }
250             
251             if ((groupfile != "") && (countfile != "")) {
252                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
253             }
254                         
255                         taxons = validParameter.validFile(parameters, "taxon", false);  
256                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
257                         else { 
258                                 //rip off quotes
259                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
260                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
261                         }
262                         m->splitAtChar(taxons, listOfTaxons, '-');
263                         
264                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
265                 
266             if (countfile == "") {
267                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
268                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
269                     parser.getNameFile(files);
270                 }
271             }
272                 }
273
274         }
275         catch(exception& e) {
276                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
277                 exit(1);
278         }
279 }
280 //**********************************************************************************************************************
281
282 int GetLineageCommand::execute(){
283         try {
284                 
285                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
286                 
287                 if (m->control_pressed) { return 0; }
288         
289         if (countfile != "") {
290             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
291                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
292             }
293         }
294                 
295                 //read through the correct file and output lines you want to keep
296                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to get
297                 if (namefile != "")                     {               readName();             }
298                 if (fastafile != "")            {               readFasta();    }
299         if (countfile != "")            {               readCount();    }
300                 if (groupfile != "")            {               readGroup();    }
301                 if (alignfile != "")            {               readAlign();    }
302                 if (listfile != "")                     {               readList();             }
303                 
304                 
305                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
306                 
307                 if (outputNames.size() != 0) {
308                         m->mothurOutEndLine();
309                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
310                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
311                         m->mothurOutEndLine();
312                         
313                         //set fasta file as new current fastafile
314                         string current = "";
315                         itTypes = outputTypes.find("fasta");
316                         if (itTypes != outputTypes.end()) {
317                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
318                         }
319                         
320                         itTypes = outputTypes.find("name");
321                         if (itTypes != outputTypes.end()) {
322                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
323                         }
324                         
325                         itTypes = outputTypes.find("group");
326                         if (itTypes != outputTypes.end()) {
327                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
328                         }
329                         
330                         itTypes = outputTypes.find("list");
331                         if (itTypes != outputTypes.end()) {
332                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
333                         }
334                         
335                         itTypes = outputTypes.find("taxonomy");
336                         if (itTypes != outputTypes.end()) {
337                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
338                         }
339                         
340             itTypes = outputTypes.find("count");
341                         if (itTypes != outputTypes.end()) {
342                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
343                         }
344                 }
345                 
346                 return 0;               
347         }
348
349         catch(exception& e) {
350                 m->errorOut(e, "GetLineageCommand", "execute");
351                 exit(1);
352         }
353 }
354
355 //**********************************************************************************************************************
356 int GetLineageCommand::readFasta(){
357         try {
358                 string thisOutputDir = outputDir;
359                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
360                 map<string, string> variables; 
361         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
362         variables["[extension]"] = m->getExtension(fastafile);
363                 string outputFileName = getOutputFileName("fasta", variables);
364                 ofstream out;
365                 m->openOutputFile(outputFileName, out);
366                 
367                 
368                 ifstream in;
369                 m->openInputFile(fastafile, in);
370                 string name;
371                 
372                 bool wroteSomething = false;
373                 
374                 while(!in.eof()){
375                 
376                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
377                         
378                         Sequence currSeq(in);
379                         name = currSeq.getName();
380                         
381                         if (name != "") {
382                                 //if this name is in the accnos file
383                                 if (names.count(name) != 0) {
384                                         wroteSomething = true;
385                                         
386                                         currSeq.printSequence(out);
387                                 }
388                         }
389                         m->gobble(in);
390                 }
391                 in.close();     
392                 out.close();
393                 
394                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
395                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
396                 
397                 return 0;
398
399         }
400         catch(exception& e) {
401                 m->errorOut(e, "GetLineageCommand", "readFasta");
402                 exit(1);
403         }
404 }
405 //**********************************************************************************************************************
406 int GetLineageCommand::readCount(){
407         try {
408                 string thisOutputDir = outputDir;
409                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
410                 map<string, string> variables; 
411                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
412         variables["[extension]"] = m->getExtension(countfile);
413                 string outputFileName = getOutputFileName("count", variables);
414                 
415                 ofstream out;
416                 m->openOutputFile(outputFileName, out);
417                 
418                 ifstream in;
419                 m->openInputFile(countfile, in);
420                 
421                 bool wroteSomething = false;
422                 
423         string headers = m->getline(in); m->gobble(in);
424         out << headers << endl;
425         
426         string name, rest; int thisTotal;
427         while (!in.eof()) {
428             
429             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
430             
431             in >> name; m->gobble(in); 
432             in >> thisTotal; m->gobble(in);
433             rest = m->getline(in); m->gobble(in);
434             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
435             
436             if (names.count(name) != 0) {
437                 out << name << '\t' << thisTotal << '\t' << rest << endl;
438                 wroteSomething = true;
439             }
440         }
441         in.close();
442                 out.close();
443         
444         //check for groups that have been eliminated
445         CountTable ct;
446         if (ct.testGroups(outputFileName)) {
447             ct.readTable(outputFileName, true);
448             ct.printTable(outputFileName);
449         }
450
451                 
452                 if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
453                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
454                        
455                 return 0;
456         }
457         catch(exception& e) {
458                 m->errorOut(e, "GetLineageCommand", "readCount");
459                 exit(1);
460         }
461 }
462 //**********************************************************************************************************************
463 int GetLineageCommand::readList(){
464         try {
465                 string thisOutputDir = outputDir;
466                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
467                 map<string, string> variables; 
468         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
469         variables["[extension]"] = m->getExtension(listfile);
470                 string outputFileName = getOutputFileName("list", variables);
471                 ofstream out;
472                 m->openOutputFile(outputFileName, out);
473                 
474                 ifstream in;
475                 m->openInputFile(listfile, in);
476                 
477                 bool wroteSomething = false;
478                 
479                 while(!in.eof()){
480                         
481                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
482
483                         //read in list vector
484                         ListVector list(in);
485                         
486                         //make a new list vector
487                         ListVector newList;
488                         newList.setLabel(list.getLabel());
489                         
490                         //for each bin
491                         for (int i = 0; i < list.getNumBins(); i++) {
492                         
493                                 //parse out names that are in accnos file
494                                 string binnames = list.get(i);
495                                 
496                                 string newNames = "";
497                                 while (binnames.find_first_of(',') != -1) { 
498                                         string name = binnames.substr(0,binnames.find_first_of(','));
499                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
500                                         
501                                         //if that name is in the .accnos file, add it
502                                         if (names.count(name) != 0) {  newNames += name + ",";  }
503                                 }
504                         
505                                 //get last name
506                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
507
508                                 //if there are names in this bin add to new list
509                                 if (newNames != "") { 
510                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
511                                         newList.push_back(newNames);    
512                                 }
513                         }
514                                 
515                         //print new listvector
516                         if (newList.getNumBins() != 0) {
517                                 wroteSomething = true;
518                                 newList.print(out);
519                         }
520                         
521                         m->gobble(in);
522                 }
523                 in.close();     
524                 out.close();
525                 
526                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
527                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
528                 
529                 return 0;
530
531         }
532         catch(exception& e) {
533                 m->errorOut(e, "GetLineageCommand", "readList");
534                 exit(1);
535         }
536 }
537 //**********************************************************************************************************************
538 int GetLineageCommand::readName(){
539         try {
540                 string thisOutputDir = outputDir;
541                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
542                 map<string, string> variables; 
543                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
544         variables["[extension]"] = m->getExtension(namefile);
545                 string outputFileName = getOutputFileName("name", variables);
546                 ofstream out;
547                 m->openOutputFile(outputFileName, out);
548                 
549
550                 ifstream in;
551                 m->openInputFile(namefile, in);
552                 string name, firstCol, secondCol;
553                 
554                 bool wroteSomething = false;
555                 
556                 
557                 while(!in.eof()){
558                 
559                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
560
561                         in >> firstCol;                         
562                         in >> secondCol;
563                         
564                         string hold = "";
565                         if (dups) { hold = secondCol; }
566                         
567                         vector<string> parsedNames;
568                         m->splitAtComma(secondCol, parsedNames);
569                         
570                         vector<string> validSecond;
571                         for (int i = 0; i < parsedNames.size(); i++) {
572                                 if (names.count(parsedNames[i]) != 0) {
573                                         validSecond.push_back(parsedNames[i]);
574                                 }
575                         }
576
577                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
578                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
579                                 out << firstCol << '\t' << hold << endl;
580                                 wroteSomething = true;
581                         }else {
582                                 //if the name in the first column is in the set then print it and any other names in second column also in set
583                                 if (names.count(firstCol) != 0) {
584                                 
585                                         wroteSomething = true;
586                                         
587                                         out << firstCol << '\t';
588                                         
589                                         //you know you have at least one valid second since first column is valid
590                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
591                                         out << validSecond[validSecond.size()-1] << endl;
592                                         
593                                 
594                                 //make first name in set you come to first column and then add the remaining names to second column
595                                 }else {
596                                         //you want part of this row
597                                         if (validSecond.size() != 0) {
598                                         
599                                                 wroteSomething = true;
600                                                 
601                                                 out << validSecond[0] << '\t';
602                                         
603                                                 //you know you have at least one valid second since first column is valid
604                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
605                                                 out << validSecond[validSecond.size()-1] << endl;
606                                         }
607                                 }
608                         }
609                         m->gobble(in);
610                 }
611                 in.close();
612                 out.close();
613                 
614                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
615                 outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
616                 
617                 return 0;
618                 
619         }
620         catch(exception& e) {
621                 m->errorOut(e, "GetLineageCommand", "readName");
622                 exit(1);
623         }
624 }
625
626 //**********************************************************************************************************************
627 int GetLineageCommand::readGroup(){
628         try {
629                 string thisOutputDir = outputDir;
630                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
631                 map<string, string> variables; 
632                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
633         variables["[extension]"] = m->getExtension(groupfile);
634                 string outputFileName = getOutputFileName("group", variables);
635                 ofstream out;
636                 m->openOutputFile(outputFileName, out);
637                 
638
639                 ifstream in;
640                 m->openInputFile(groupfile, in);
641                 string name, group;
642                 
643                 bool wroteSomething = false;
644                 
645                 while(!in.eof()){
646
647                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
648
649
650                         in >> name;                             //read from first column
651                         in >> group;                    //read from second column
652                         
653                         //if this name is in the accnos file
654                         if (names.count(name) != 0) {
655                                 wroteSomething = true;
656                                 
657                                 out << name << '\t' << group << endl;
658                         }
659                                         
660                         m->gobble(in);
661                 }
662                 in.close();
663                 out.close();
664                 
665                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
666                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
667                 
668                 return 0;
669
670         }
671         catch(exception& e) {
672                 m->errorOut(e, "GetLineageCommand", "readGroup");
673                 exit(1);
674         }
675 }
676 //**********************************************************************************************************************
677 int GetLineageCommand::readTax(){
678         try {
679                 string thisOutputDir = outputDir;
680                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
681                 map<string, string> variables; 
682                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
683         variables["[extension]"] = m->getExtension(taxfile);
684                 string outputFileName = getOutputFileName("taxonomy", variables);
685                 ofstream out;
686                 m->openOutputFile(outputFileName, out);
687                 
688                 ifstream in;
689                 m->openInputFile(taxfile, in);
690                 string name, tax;
691                 
692                 //bool wroteSomething = false;
693                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
694                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
695                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
696                 
697                 for (int i = 0; i < listOfTaxons.size(); i++) {
698                         noConfidenceTaxons[i] = listOfTaxons[i];
699                         int hasConPos = listOfTaxons[i].find_first_of('(');
700                         if (hasConPos != string::npos) {  
701                                 taxonsHasConfidence[i] = true; 
702                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
703                                 noConfidenceTaxons[i] = listOfTaxons[i];
704                                 m->removeConfidences(noConfidenceTaxons[i]);
705                         }
706                 }
707                 
708                 
709                 while(!in.eof()){
710
711                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
712
713                         in >> name;                             //read from first column
714                         in >> tax;                      //read from second column
715                         
716             string noQuotesTax = m->removeQuotes(tax);
717             
718                         for (int j = 0; j < listOfTaxons.size(); j++) {
719                                                         
720                                 string newtax = noQuotesTax;
721                         
722                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
723                                 if (!taxonsHasConfidence[j]) {
724                                         int hasConfidences = noQuotesTax.find_first_of('(');
725                                         if (hasConfidences != string::npos) { 
726                                                 newtax = noQuotesTax;
727                                                 m->removeConfidences(newtax);
728                                         }
729                                 
730                                         int pos = newtax.find(noConfidenceTaxons[j]);
731                                 
732                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
733                                                 names.insert(name); 
734                                                 out << name << '\t' << tax << endl;
735                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
736                                                 break;
737                                         }
738                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
739                                         int hasConfidences = noQuotesTax.find_first_of('(');
740                                         if (hasConfidences == string::npos) { 
741                                         
742                                                 int pos = newtax.find(noConfidenceTaxons[j]);
743                                         
744                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
745                                                         names.insert(name);
746                                                         out << name << '\t' << tax << endl;
747                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
748                                                         break;
749                                                 }
750                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
751                                                 //first remove confidences from both and see if the taxonomy exists
752                                         
753                                                 string noNewTax = noQuotesTax;
754                                                 int hasConfidences = noQuotesTax.find_first_of('(');
755                                                 if (hasConfidences != string::npos) { 
756                                                         noNewTax = noQuotesTax;
757                                                         m->removeConfidences(noNewTax);
758                                                 }
759                                         
760                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
761                                         
762                                                 if (pos != string::npos) { //if yes, then are the confidences okay
763                                                 
764                                                         bool good = true;
765                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
766                                                 
767                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
768                                                         //we want to "line them up", so we will find the the index where the searchstring starts
769                                                         int index = 0;
770                                                         for (int i = 0; i < usersTaxon.size(); i++) {
771                                                         
772                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
773                                                                         index = i;  
774                                                                         int spot = 0;
775                                                                         bool goodspot = true;
776                                                                         //is this really the start, or are we dealing with a taxon of the same name?
777                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
778                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
779                                                                                 else { spot++; }
780                                                                         }
781                                                                 
782                                                                         if (goodspot) { break; }
783                                                                 }
784                                                         }
785                                                 
786                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
787                                                         
788                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
789                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
790                                                                                 good = false;
791                                                                                 break;
792                                                                         }
793                                                                 }else {
794                                                                         good = false;
795                                                                         break;
796                                                                 }
797                                                         }
798                                                 
799                                                         //passed the test so add you
800                                                         if (good) {
801                                                                 names.insert(name);
802                                                                 out << name << '\t' << tax << endl;
803                                                                 break;
804                                                         }
805                                                 }
806                                         }
807                                 }
808                         
809                         }
810                         
811                         m->gobble(in);
812                 }
813                 in.close();
814                 out.close();
815                 
816                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
817                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
818                         
819                 return 0;
820
821         }
822         catch(exception& e) {
823                 m->errorOut(e, "GetLineageCommand", "readTax");
824                 exit(1);
825         }
826 }
827 /**************************************************************************************************/
828 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
829         try {
830         
831                 vector< map<string, float> > t;
832                 string taxon = "";
833                 int taxLength = tax.length();
834         
835                 for(int i=0;i<taxLength;i++){
836                         if(tax[i] == ';'){
837                 
838                                 int openParen = taxon.find_last_of('(');
839                                 int closeParen = taxon.find_last_of(')');
840                                 
841                                 string newtaxon, confidence;
842                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
843                     string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
844                     if (m->isNumeric1(confidenceScore)) {  //its a confidence
845                         newtaxon = taxon.substr(0, openParen); //rip off confidence
846                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
847                     }else { //its part of the taxon
848                         newtaxon = taxon;
849                         confidence = "0";
850                     }
851                                 }else{
852                                         newtaxon = taxon;
853                                         confidence = "0";
854                                 } 
855                                 float con = 0;
856                                 convert(confidence, con);
857                                 
858                                 map<string, float> temp;
859                                 temp[newtaxon] = con;
860                 
861                                 t.push_back(temp);
862                                 taxon = "";
863                         }
864                         else{
865                                 taxon += tax[i];
866                 
867                         }
868                 }
869                 
870                 return t;
871         }
872         catch(exception& e) {
873                 m->errorOut(e, "GetLineageCommand", "getTaxons");
874                 exit(1);
875         }
876 }
877 //**********************************************************************************************************************
878 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
879 int GetLineageCommand::readAlign(){
880         try {
881                 string thisOutputDir = outputDir;
882                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
883         map<string, string> variables; 
884                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
885         variables["[extension]"] = m->getExtension(alignfile);
886                 string outputFileName = getOutputFileName("alignreport", variables);
887                 
888                 ofstream out;
889                 m->openOutputFile(outputFileName, out);
890                 
891
892                 ifstream in;
893                 m->openInputFile(alignfile, in);
894                 string name, junk;
895                 
896                 bool wroteSomething = false;
897                 
898                 //read column headers
899                 for (int i = 0; i < 16; i++) {  
900                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
901                         else                    {       break;                  }
902                 }
903                 out << endl;
904                 
905                 while(!in.eof()){
906                 
907                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
908
909
910                         in >> name;                             //read from first column
911                         
912                         //if this name is in the accnos file
913                         if (names.count(name) != 0) {
914                                 wroteSomething = true;
915                                 
916                                 out << name << '\t';
917                                 
918                                 //read rest
919                                 for (int i = 0; i < 15; i++) {  
920                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
921                                         else                    {       break;                  }
922                                 }
923                                 out << endl;
924                                 
925                         }else {//still read just don't do anything with it
926                                 //read rest
927                                 for (int i = 0; i < 15; i++) {  
928                                         if (!in.eof())  {       in >> junk;             }
929                                         else                    {       break;                  }
930                                 }
931                         }
932                         
933                         m->gobble(in);
934                 }
935                 in.close();
936                 out.close();
937                 
938                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
939                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
940                 
941                 return 0;
942                 
943         }
944         catch(exception& e) {
945                 m->errorOut(e, "GetLineageCommand", "readAlign");
946                 exit(1);
947         }
948 }
949 //**********************************************************************************************************************
950
951
952
953