]> git.donarmstrong.com Git - mothur.git/blob - getlineagecommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / getlineagecommand.cpp
1 /*
2  *  getlineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> GetLineageCommand::setParameters(){      
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false, true); parameters.push_back(pfasta);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false, true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false, true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false, true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
24         CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
25                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
26         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
27                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
28         CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
29                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true, true); parameters.push_back(ptaxon);
30                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "GetLineageCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string GetLineageCommand::getHelpString(){      
45         try {
46                 string helpString = "";
47                 helpString += "The get.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n";
48                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
49                 helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups.  You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n";
50                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
51                 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
52                 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
53                 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
54          helpString += "The label parameter is used to analyze specific labels in your input. \n";
55                 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
56                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
57                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
58                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "GetLineageCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string GetLineageCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         
72         if (type == "fasta")                {   pattern = "[filename],pick,[extension]";    }
73         else if (type == "taxonomy")        {   pattern = "[filename],pick,[extension]";    }
74         else if (type == "constaxonomy")    {   pattern = "[filename],pick,[extension]";    }
75         else if (type == "name")            {   pattern = "[filename],pick,[extension]";    }
76         else if (type == "group")           {   pattern = "[filename],pick,[extension]";    }
77         else if (type == "count")           {   pattern = "[filename],pick,[extension]";    }
78         else if (type == "list")            {   pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]";    }
79         else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
80         else if (type == "alignreport")     {   pattern = "[filename],pick.align.report";    }
81         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
82         
83         return pattern;
84     }
85     catch(exception& e) {
86         m->errorOut(e, "GetLineageCommand", "getOutputPattern");
87         exit(1);
88     }
89 }
90 //**********************************************************************************************************************
91 GetLineageCommand::GetLineageCommand(){ 
92         try {
93                 abort = true; calledHelp = true; 
94                 setParameters();
95                 vector<string> tempOutNames;
96                 outputTypes["fasta"] = tempOutNames;
97                 outputTypes["taxonomy"] = tempOutNames;
98                 outputTypes["name"] = tempOutNames;
99                 outputTypes["group"] = tempOutNames;
100                 outputTypes["alignreport"] = tempOutNames;
101                 outputTypes["list"] = tempOutNames;
102         outputTypes["count"] = tempOutNames;
103         outputTypes["constaxonomy"] = tempOutNames;
104         outputTypes["shared"] = tempOutNames;
105         }
106         catch(exception& e) {
107                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
108                 exit(1);
109         }
110 }
111 //**********************************************************************************************************************
112 GetLineageCommand::GetLineageCommand(string option)  {
113         try {
114                 abort = false; calledHelp = false;   
115                                 
116                 //allow user to run help
117                 if(option == "help") { help(); abort = true; calledHelp = true; }
118                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119                 
120                 else {
121                         vector<string> myArray = setParameters();
122                         
123                         OptionParser parser(option);
124                         map<string,string> parameters = parser.getParameters();
125                         
126                         ValidParameters validParameter;
127                         map<string,string>::iterator it;
128                         
129                         //check to make sure all parameters are valid for command
130                         for (it = parameters.begin(); it != parameters.end(); it++) { 
131                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
132                         }
133                         
134                         //initialize outputTypes
135                         vector<string> tempOutNames;
136                         outputTypes["fasta"] = tempOutNames;
137                         outputTypes["taxonomy"] = tempOutNames;
138                         outputTypes["name"] = tempOutNames;
139                         outputTypes["group"] = tempOutNames;
140                         outputTypes["alignreport"] = tempOutNames;
141                         outputTypes["list"] = tempOutNames;
142             outputTypes["count"] = tempOutNames;
143             outputTypes["constaxonomy"] = tempOutNames;
144             outputTypes["shared"] = tempOutNames;
145
146                         //if the user changes the output directory command factory will send this info to us in the output parameter 
147                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
148                         
149                         //if the user changes the input directory command factory will send this info to us in the output parameter 
150                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
151                         if (inputDir == "not found"){   inputDir = "";          }
152                         else {
153                                 string path;
154                                 it = parameters.find("alignreport");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
160                                 }
161                                 
162                                 it = parameters.find("fasta");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
168                                 }
169                                 
170                                 it = parameters.find("list");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("name");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
184                                 }
185                                 
186                                 it = parameters.find("group");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
192                                 }
193                                 
194                                 it = parameters.find("taxonomy");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
200                                 }
201                 
202                 it = parameters.find("count");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
208                                 }
209                 
210                 it = parameters.find("constaxonomy");
211                                 //user has given a template file
212                                 if(it != parameters.end()){
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
216                                 }
217                 
218                 it = parameters.find("shared");
219                                 //user has given a template file
220                                 if(it != parameters.end()){
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
224                                 }
225                         }
226
227                         
228                         //check for required parameters                 
229                         fastafile = validParameter.validFile(parameters, "fasta", true);
230                         if (fastafile == "not open") { fastafile = ""; abort = true; }
231                         else if (fastafile == "not found") {  fastafile = "";  }
232                         else { m->setFastaFile(fastafile); }
233                         
234                         namefile = validParameter.validFile(parameters, "name", true);
235                         if (namefile == "not open") { namefile = ""; abort = true; }
236                         else if (namefile == "not found") {  namefile = "";  }  
237                         else { m->setNameFile(namefile); }
238                         
239                         groupfile = validParameter.validFile(parameters, "group", true);
240                         if (groupfile == "not open") { abort = true; }
241                         else if (groupfile == "not found") {  groupfile = "";  }        
242                         else { m->setGroupFile(groupfile); }
243                         
244                         alignfile = validParameter.validFile(parameters, "alignreport", true);
245                         if (alignfile == "not open") { abort = true; }
246                         else if (alignfile == "not found") {  alignfile = "";  }
247                         
248                         listfile = validParameter.validFile(parameters, "list", true);
249                         if (listfile == "not open") { abort = true; }
250                         else if (listfile == "not found") {  listfile = "";  }
251                         else { m->setListFile(listfile); }
252                         
253                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
254                         if (taxfile == "not open") { taxfile = ""; abort = true; }
255                         else if (taxfile == "not found") {  taxfile = "";               }
256                         else { m->setTaxonomyFile(taxfile); }
257             
258             sharedfile = validParameter.validFile(parameters, "shared", true);
259                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
260                         else if (sharedfile == "not found") {  sharedfile = "";         }
261                         else { m->setSharedFile(sharedfile); }
262
263             
264             constaxonomy = validParameter.validFile(parameters, "constaxonomy", true);
265                         if (constaxonomy == "not open") { constaxonomy = ""; abort = true; }
266                         else if (constaxonomy == "not found") {  constaxonomy = "";             }
267     
268             if ((constaxonomy == "") && (taxfile == "")) {
269                 taxfile = m->getTaxonomyFile();
270                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
271                 else {
272                     m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
273                         }
274             
275                         string usedDups = "true";
276                         string temp = validParameter.validFile(parameters, "dups", false);      
277                         if (temp == "not found") { 
278                                 if (namefile != "") {  temp = "true";                                   }
279                                 else                            {  temp = "false"; usedDups = "";       }
280                         }
281                         dups = m->isTrue(temp);
282             
283             countfile = validParameter.validFile(parameters, "count", true);
284             if (countfile == "not open") { countfile = ""; abort = true; }
285             else if (countfile == "not found") { countfile = "";  }     
286             else { m->setCountTableFile(countfile); }
287             
288             if ((namefile != "") && (countfile != "")) {
289                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
290             }
291             
292             if ((groupfile != "") && (countfile != "")) {
293                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
294             }
295                         
296                         taxons = validParameter.validFile(parameters, "taxon", false);  
297                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
298                         else { 
299                                 //rip off quotes
300                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
301                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
302                         }
303                         m->splitAtChar(taxons, listOfTaxons, '-');
304                         
305                         if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile."); m->mothurOutEndLine(); abort = true; }
306             
307             if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) {
308                 m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n");  abort = true;
309             }
310             
311             if ((constaxonomy != "") && (taxfile != "")) {
312                 m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true;
313             }
314             
315             if ((sharedfile != "") && (taxfile != "")) {
316                 m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true;
317             }
318             
319             if ((sharedfile != "") || (listfile != "")) {
320                 label = validParameter.validFile(parameters, "label", false);
321                 if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); }
322             }
323             
324             if (countfile == "") {
325                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
326                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
327                     parser.getNameFile(files);
328                 }
329             }
330                 }
331
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
335                 exit(1);
336         }
337 }
338 //**********************************************************************************************************************
339
340 int GetLineageCommand::execute(){
341         try {
342                 
343                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
344                 
345                 if (m->control_pressed) { return 0; }
346         
347         if (countfile != "") {
348             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
349                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
350             }
351         }
352                 
353                 //read through the correct file and output lines you want to keep
354                 if (taxfile != "")                      {
355             readTax(); //fills the set of names to get
356             if (namefile != "")                 {               readName();             }
357             if (fastafile != "")                {               readFasta();    }
358             if (countfile != "")                {               readCount();    }
359             if (groupfile != "")                {               readGroup();    }
360             if (alignfile != "")                {               readAlign();    }
361             if (listfile != "")                 {               readList();             }
362
363         }else {
364             readConsTax();
365             if (listfile != "")                 {               readConsList();         }
366             if (sharedfile != "")               {               readShared();           }
367         }
368                                 
369                 
370                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
371                 
372                 if (outputNames.size() != 0) {
373                         m->mothurOutEndLine();
374                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
375                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
376                         m->mothurOutEndLine();
377                         
378                         //set fasta file as new current fastafile
379                         string current = "";
380                         itTypes = outputTypes.find("fasta");
381                         if (itTypes != outputTypes.end()) {
382                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
383                         }
384                         
385                         itTypes = outputTypes.find("name");
386                         if (itTypes != outputTypes.end()) {
387                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
388                         }
389                         
390                         itTypes = outputTypes.find("group");
391                         if (itTypes != outputTypes.end()) {
392                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
393                         }
394                         
395                         itTypes = outputTypes.find("list");
396                         if (itTypes != outputTypes.end()) {
397                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
398                         }
399             
400             itTypes = outputTypes.find("shared");
401                         if (itTypes != outputTypes.end()) {
402                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
403                         }
404                         
405                         itTypes = outputTypes.find("taxonomy");
406                         if (itTypes != outputTypes.end()) {
407                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
408                         }
409                         
410             itTypes = outputTypes.find("count");
411                         if (itTypes != outputTypes.end()) {
412                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
413                         }
414                 }
415                 
416                 return 0;               
417         }
418
419         catch(exception& e) {
420                 m->errorOut(e, "GetLineageCommand", "execute");
421                 exit(1);
422         }
423 }
424
425 //**********************************************************************************************************************
426 int GetLineageCommand::readFasta(){
427         try {
428                 string thisOutputDir = outputDir;
429                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
430                 map<string, string> variables; 
431         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
432         variables["[extension]"] = m->getExtension(fastafile);
433                 string outputFileName = getOutputFileName("fasta", variables);
434                 ofstream out;
435                 m->openOutputFile(outputFileName, out);
436                 
437                 
438                 ifstream in;
439                 m->openInputFile(fastafile, in);
440                 string name;
441                 
442                 bool wroteSomething = false;
443                 
444                 while(!in.eof()){
445                 
446                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
447                         
448                         Sequence currSeq(in);
449                         name = currSeq.getName();
450                         
451                         if (name != "") {
452                                 //if this name is in the accnos file
453                                 if (names.count(name) != 0) {
454                                         wroteSomething = true;
455                                         
456                                         currSeq.printSequence(out);
457                                 }
458                         }
459                         m->gobble(in);
460                 }
461                 in.close();     
462                 out.close();
463                 
464                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
465                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
466                 
467                 return 0;
468
469         }
470         catch(exception& e) {
471                 m->errorOut(e, "GetLineageCommand", "readFasta");
472                 exit(1);
473         }
474 }
475 //**********************************************************************************************************************
476 int GetLineageCommand::readCount(){
477         try {
478                 string thisOutputDir = outputDir;
479                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
480                 map<string, string> variables; 
481                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
482         variables["[extension]"] = m->getExtension(countfile);
483                 string outputFileName = getOutputFileName("count", variables);
484                 
485                 ofstream out;
486                 m->openOutputFile(outputFileName, out);
487                 
488                 ifstream in;
489                 m->openInputFile(countfile, in);
490                 
491                 bool wroteSomething = false;
492                 
493         string headers = m->getline(in); m->gobble(in);
494         out << headers << endl;
495         
496         string name, rest; int thisTotal;
497         while (!in.eof()) {
498             
499             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
500             
501             in >> name; m->gobble(in); 
502             in >> thisTotal; m->gobble(in);
503             rest = m->getline(in); m->gobble(in);
504             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
505             
506             if (names.count(name) != 0) {
507                 out << name << '\t' << thisTotal << '\t' << rest << endl;
508                 wroteSomething = true;
509             }
510         }
511         in.close();
512                 out.close();
513         
514         //check for groups that have been eliminated
515         CountTable ct;
516         if (ct.testGroups(outputFileName)) {
517             ct.readTable(outputFileName, true);
518             ct.printTable(outputFileName);
519         }
520
521                 
522                 if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
523                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
524                        
525                 return 0;
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "GetLineageCommand", "readCount");
529                 exit(1);
530         }
531 }
532 //**********************************************************************************************************************
533 int GetLineageCommand::readList(){
534         try {
535                 string thisOutputDir = outputDir;
536                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
537                 map<string, string> variables; 
538         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
539         variables["[extension]"] = m->getExtension(listfile);
540                 string outputFileName = getOutputFileName("list", variables);
541                 ofstream out;
542                 m->openOutputFile(outputFileName, out);
543                 
544                 ifstream in;
545                 m->openInputFile(listfile, in);
546                 
547                 bool wroteSomething = false;
548                 
549                 while(!in.eof()){
550                         
551                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
552
553                         //read in list vector
554                         ListVector list(in);
555                         
556                         //make a new list vector
557                         ListVector newList;
558                         newList.setLabel(list.getLabel());
559                         
560                         //for each bin
561                         for (int i = 0; i < list.getNumBins(); i++) {
562                         
563                                 //parse out names that are in accnos file
564                                 string binnames = list.get(i);
565                 vector<string> bnames;
566                 m->splitAtComma(binnames, bnames);
567                                 
568                                 string newNames = "";
569                 for (int j = 0; j < bnames.size(); j++) {
570                                         string name = bnames[j];
571                                         //if that name is in the .accnos file, add it
572                                         if (names.count(name) != 0) {  newNames += name + ",";  }
573                                 }
574
575
576                                 //if there are names in this bin add to new list
577                                 if (newNames != "") { 
578                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
579                                         newList.push_back(newNames);    
580                                 }
581                         }
582                                 
583                         //print new listvector
584                         if (newList.getNumBins() != 0) {
585                                 wroteSomething = true;
586                                 newList.print(out);
587                         }
588                         
589                         m->gobble(in);
590                 }
591                 in.close();     
592                 out.close();
593                 
594                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
595                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
596                 
597                 return 0;
598
599         }
600         catch(exception& e) {
601                 m->errorOut(e, "GetLineageCommand", "readList");
602                 exit(1);
603         }
604 }
605 //**********************************************************************************************************************
606 int GetLineageCommand::readConsList(){
607         try {
608                 getListVector();
609         
610         if (m->control_pressed) { delete list; return 0;}
611         
612         ListVector newList;
613         newList.setLabel(list->getLabel());
614         int selectedCount = 0;
615         bool wroteSomething = false;
616         string snumBins = toString(list->getNumBins());
617         
618         for (int i = 0; i < list->getNumBins(); i++) {
619             
620             if (m->control_pressed) { delete list; return 0;}
621             
622             //create a label for this otu
623             string otuLabel = "Otu";
624             string sbinNumber = toString(i+1);
625             if (sbinNumber.length() < snumBins.length()) {
626                 int diff = snumBins.length() - sbinNumber.length();
627                 for (int h = 0; h < diff; h++) { otuLabel += "0"; }
628             }
629             otuLabel += sbinNumber;
630             
631             if (names.count(otuLabel) != 0) {
632                                 selectedCount++;
633                 newList.push_back(list->get(i));
634             }
635         }
636         
637         string thisOutputDir = outputDir;
638                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
639         map<string, string> variables;
640                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
641         variables["[extension]"] = m->getExtension(listfile);
642         variables["[distance]"] = list->getLabel();
643                 string outputFileName = getOutputFileName("list", variables);
644                 ofstream out;
645                 m->openOutputFile(outputFileName, out);
646         
647                 delete list;
648         //print new listvector
649         if (newList.getNumBins() != 0) {
650             wroteSomething = true;
651             newList.print(out);
652         }
653                 out.close();
654                 
655                 if (wroteSomething == false) { m->mothurOut("Your file does not contain OTUs from " + taxons + "."); m->mothurOutEndLine();  }
656                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
657                 
658                 m->mothurOut("Selected " + toString(selectedCount) + " OTUs from your list file."); m->mothurOutEndLine();
659         
660                 return 0;
661         
662         }
663         catch(exception& e) {
664                 m->errorOut(e, "GetLineageCommand", "readConsList");
665                 exit(1);
666         }
667 }
668 //**********************************************************************************************************************
669 int GetLineageCommand::getListVector(){
670         try {
671                 InputData input(listfile, "list");
672                 list = input.getListVector();
673                 string lastLabel = list->getLabel();
674                 
675                 if (label == "") { label = lastLabel;  return 0; }
676                 
677                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
678                 set<string> labels; labels.insert(label);
679                 set<string> processedLabels;
680                 set<string> userLabels = labels;
681                 
682                 //as long as you are not at the end of the file or done wih the lines you want
683                 while((list != NULL) && (userLabels.size() != 0)) {
684                         if (m->control_pressed) {  return 0;  }
685                         
686                         if(labels.count(list->getLabel()) == 1){
687                                 processedLabels.insert(list->getLabel());
688                                 userLabels.erase(list->getLabel());
689                                 break;
690                         }
691                         
692                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
693                                 string saveLabel = list->getLabel();
694                                 
695                                 delete list;
696                                 list = input.getListVector(lastLabel);
697                                 
698                                 processedLabels.insert(list->getLabel());
699                                 userLabels.erase(list->getLabel());
700                                 
701                                 //restore real lastlabel to save below
702                                 list->setLabel(saveLabel);
703                                 break;
704                         }
705                         
706                         lastLabel = list->getLabel();
707                         
708                         //get next line to process
709                         //prevent memory leak
710                         delete list;
711                         list = input.getListVector();
712                 }
713                 
714                 
715                 if (m->control_pressed) {  return 0;  }
716                 
717                 //output error messages about any remaining user labels
718                 set<string>::iterator it;
719                 bool needToRun = false;
720                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
721                         m->mothurOut("Your file does not include the label " + *it);
722                         if (processedLabels.count(lastLabel) != 1) {
723                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
724                                 needToRun = true;
725                         }else {
726                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
727                         }
728                 }
729                 
730                 //run last label if you need to
731                 if (needToRun == true)  {
732                         delete list;
733                         list = input.getListVector(lastLabel);
734                 }
735                 
736                 return 0;
737         }
738         catch(exception& e) {
739                 m->errorOut(e, "GetLineageCommand", "getListVector");
740                 exit(1);
741         }
742 }
743
744 //**********************************************************************************************************************
745 int GetLineageCommand::readShared(){
746         try {
747         
748         getShared();
749         
750         if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
751         
752         vector<string> newLabels;
753         
754         //create new "filtered" lookup
755         vector<SharedRAbundVector*> newLookup;
756         for (int i = 0; i < lookup.size(); i++) {
757             SharedRAbundVector* temp = new SharedRAbundVector();
758                         temp->setLabel(lookup[i]->getLabel());
759                         temp->setGroup(lookup[i]->getGroup());
760                         newLookup.push_back(temp);
761         }
762         
763         bool wroteSomething = false;
764         int numSelected = 0;
765         for (int i = 0; i < lookup[0]->getNumBins(); i++) {
766             
767             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
768             
769             //is this otu on the list
770             if (names.count(m->currentBinLabels[i]) != 0) {
771                 numSelected++; wroteSomething = true;
772                 newLabels.push_back(m->currentBinLabels[i]);
773                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
774                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
775                 }
776             }
777         }
778         
779         string thisOutputDir = outputDir;
780                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
781         map<string, string> variables;
782                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
783         variables["[extension]"] = m->getExtension(sharedfile);
784         variables["[distance]"] = lookup[0]->getLabel();
785                 string outputFileName = getOutputFileName("shared", variables);
786         ofstream out;
787                 m->openOutputFile(outputFileName, out);
788                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
789         
790                 for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
791         
792         m->currentBinLabels = newLabels;
793         
794                 newLookup[0]->printHeaders(out);
795                 
796                 for (int i = 0; i < newLookup.size(); i++) {
797                         out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
798                         newLookup[i]->print(out);
799                 }
800                 out.close();
801         
802         for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
803         
804         if (wroteSomething == false) { m->mothurOut("Your file does not contain OTUs from " + taxons + "."); m->mothurOutEndLine();  }
805         
806                 m->mothurOut("Selected " + toString(numSelected) + " OTUs from your shared file."); m->mothurOutEndLine();
807         
808         return 0;
809     }
810         catch(exception& e) {
811                 m->errorOut(e, "GetLineageCommand", "readShared");
812                 exit(1);
813         }
814 }
815 //**********************************************************************************************************************
816 int GetLineageCommand::getShared(){
817         try {
818                 InputData input(sharedfile, "sharedfile");
819                 lookup = input.getSharedRAbundVectors();
820                 string lastLabel = lookup[0]->getLabel();
821                 
822                 if (label == "") { label = lastLabel;  return 0; }
823                 
824                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
825                 set<string> labels; labels.insert(label);
826                 set<string> processedLabels;
827                 set<string> userLabels = labels;
828                 
829                 //as long as you are not at the end of the file or done wih the lines you want
830                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
831                         if (m->control_pressed) {   return 0;  }
832                         
833                         if(labels.count(lookup[0]->getLabel()) == 1){
834                                 processedLabels.insert(lookup[0]->getLabel());
835                                 userLabels.erase(lookup[0]->getLabel());
836                                 break;
837                         }
838                         
839                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
840                                 string saveLabel = lookup[0]->getLabel();
841                                 
842                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
843                                 lookup = input.getSharedRAbundVectors(lastLabel);
844                                 
845                                 processedLabels.insert(lookup[0]->getLabel());
846                                 userLabels.erase(lookup[0]->getLabel());
847                                 
848                                 //restore real lastlabel to save below
849                                 lookup[0]->setLabel(saveLabel);
850                                 break;
851                         }
852                         
853                         lastLabel = lookup[0]->getLabel();
854                         
855                         //get next line to process
856                         //prevent memory leak
857                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
858                         lookup = input.getSharedRAbundVectors();
859                 }
860                 
861                 
862                 if (m->control_pressed) {  return 0;  }
863                 
864                 //output error messages about any remaining user labels
865                 set<string>::iterator it;
866                 bool needToRun = false;
867                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
868                         m->mothurOut("Your file does not include the label " + *it);
869                         if (processedLabels.count(lastLabel) != 1) {
870                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
871                                 needToRun = true;
872                         }else {
873                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
874                         }
875                 }
876                 
877                 //run last label if you need to
878                 if (needToRun == true)  {
879                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } }
880                         lookup = input.getSharedRAbundVectors(lastLabel);
881                 }
882                 
883                 return 0;
884         }
885         catch(exception& e) {
886                 m->errorOut(e, "GetLineageCommand", "getShared");
887                 exit(1);
888         }
889 }
890
891 //**********************************************************************************************************************
892 int GetLineageCommand::readName(){
893         try {
894                 string thisOutputDir = outputDir;
895                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
896                 map<string, string> variables; 
897                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
898         variables["[extension]"] = m->getExtension(namefile);
899                 string outputFileName = getOutputFileName("name", variables);
900                 ofstream out;
901                 m->openOutputFile(outputFileName, out);
902                 
903
904                 ifstream in;
905                 m->openInputFile(namefile, in);
906                 string name, firstCol, secondCol;
907                 
908                 bool wroteSomething = false;
909                 
910                 
911                 while(!in.eof()){
912                 
913                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
914
915                         in >> firstCol;                         
916                         in >> secondCol;
917                         
918                         string hold = "";
919                         if (dups) { hold = secondCol; }
920                         
921                         vector<string> parsedNames;
922                         m->splitAtComma(secondCol, parsedNames);
923                         
924                         vector<string> validSecond;
925                         for (int i = 0; i < parsedNames.size(); i++) {
926                                 if (names.count(parsedNames[i]) != 0) {
927                                         validSecond.push_back(parsedNames[i]);
928                                 }
929                         }
930
931                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
932                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
933                                 out << firstCol << '\t' << hold << endl;
934                                 wroteSomething = true;
935                         }else {
936                                 //if the name in the first column is in the set then print it and any other names in second column also in set
937                                 if (names.count(firstCol) != 0) {
938                                 
939                                         wroteSomething = true;
940                                         
941                                         out << firstCol << '\t';
942                                         
943                                         //you know you have at least one valid second since first column is valid
944                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
945                                         out << validSecond[validSecond.size()-1] << endl;
946                                         
947                                 
948                                 //make first name in set you come to first column and then add the remaining names to second column
949                                 }else {
950                                         //you want part of this row
951                                         if (validSecond.size() != 0) {
952                                         
953                                                 wroteSomething = true;
954                                                 
955                                                 out << validSecond[0] << '\t';
956                                         
957                                                 //you know you have at least one valid second since first column is valid
958                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
959                                                 out << validSecond[validSecond.size()-1] << endl;
960                                         }
961                                 }
962                         }
963                         m->gobble(in);
964                 }
965                 in.close();
966                 out.close();
967                 
968                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
969                 outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
970                 
971                 return 0;
972                 
973         }
974         catch(exception& e) {
975                 m->errorOut(e, "GetLineageCommand", "readName");
976                 exit(1);
977         }
978 }
979
980 //**********************************************************************************************************************
981 int GetLineageCommand::readGroup(){
982         try {
983                 string thisOutputDir = outputDir;
984                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
985                 map<string, string> variables; 
986                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
987         variables["[extension]"] = m->getExtension(groupfile);
988                 string outputFileName = getOutputFileName("group", variables);
989                 ofstream out;
990                 m->openOutputFile(outputFileName, out);
991                 
992
993                 ifstream in;
994                 m->openInputFile(groupfile, in);
995                 string name, group;
996                 
997                 bool wroteSomething = false;
998                 
999                 while(!in.eof()){
1000
1001                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1002
1003
1004                         in >> name;                             //read from first column
1005                         in >> group;                    //read from second column
1006                         
1007                         //if this name is in the accnos file
1008                         if (names.count(name) != 0) {
1009                                 wroteSomething = true;
1010                                 
1011                                 out << name << '\t' << group << endl;
1012                         }
1013                                         
1014                         m->gobble(in);
1015                 }
1016                 in.close();
1017                 out.close();
1018                 
1019                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
1020                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
1021                 
1022                 return 0;
1023
1024         }
1025         catch(exception& e) {
1026                 m->errorOut(e, "GetLineageCommand", "readGroup");
1027                 exit(1);
1028         }
1029 }
1030 //**********************************************************************************************************************
1031 int GetLineageCommand::readTax(){
1032         try {
1033                 string thisOutputDir = outputDir;
1034                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1035                 map<string, string> variables; 
1036                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1037         variables["[extension]"] = m->getExtension(taxfile);
1038                 string outputFileName = getOutputFileName("taxonomy", variables);
1039                 ofstream out;
1040                 m->openOutputFile(outputFileName, out);
1041                 
1042                 ifstream in;
1043                 m->openInputFile(taxfile, in);
1044                 string name, tax;
1045                 
1046                 //bool wroteSomething = false;
1047                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1048                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1049                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1050                 
1051                 for (int i = 0; i < listOfTaxons.size(); i++) {
1052                         noConfidenceTaxons[i] = listOfTaxons[i];
1053                         int hasConPos = listOfTaxons[i].find_first_of('(');
1054                         if (hasConPos != string::npos) {  
1055                                 taxonsHasConfidence[i] = true; 
1056                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
1057                                 noConfidenceTaxons[i] = listOfTaxons[i];
1058                                 m->removeConfidences(noConfidenceTaxons[i]);
1059                         }
1060                 }
1061                 
1062                 
1063                 while(!in.eof()){
1064
1065                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1066
1067                         in >> name;                             //read from first column
1068                         in >> tax;                      //read from second column
1069                         
1070             string noQuotesTax = m->removeQuotes(tax);
1071             
1072                         for (int j = 0; j < listOfTaxons.size(); j++) {
1073                                                         
1074                                 string newtax = noQuotesTax;
1075                         
1076                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1077                                 if (!taxonsHasConfidence[j]) {
1078                                         int hasConfidences = noQuotesTax.find_first_of('(');
1079                                         if (hasConfidences != string::npos) { 
1080                                                 newtax = noQuotesTax;
1081                                                 m->removeConfidences(newtax);
1082                                         }
1083                                 
1084                                         int pos = newtax.find(noConfidenceTaxons[j]);
1085                                 
1086                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
1087                                                 names.insert(name); 
1088                                                 out << name << '\t' << tax << endl;
1089                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
1090                                                 break;
1091                                         }
1092                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
1093                                         int hasConfidences = noQuotesTax.find_first_of('(');
1094                                         if (hasConfidences == string::npos) { 
1095                                         
1096                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1097                                         
1098                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
1099                                                         names.insert(name);
1100                                                         out << name << '\t' << tax << endl;
1101                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
1102                                                         break;
1103                                                 }
1104                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1105                                                 //first remove confidences from both and see if the taxonomy exists
1106                                         
1107                                                 string noNewTax = noQuotesTax;
1108                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1109                                                 if (hasConfidences != string::npos) { 
1110                                                         noNewTax = noQuotesTax;
1111                                                         m->removeConfidences(noNewTax);
1112                                                 }
1113                                         
1114                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1115                                         
1116                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1117                                                 
1118                                                         bool good = true;
1119                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1120                                                 
1121                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1122                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1123                                                         int index = 0;
1124                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1125                                                         
1126                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
1127                                                                         index = i;  
1128                                                                         int spot = 0;
1129                                                                         bool goodspot = true;
1130                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1131                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1132                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1133                                                                                 else { spot++; }
1134                                                                         }
1135                                                                 
1136                                                                         if (goodspot) { break; }
1137                                                                 }
1138                                                         }
1139                                                 
1140                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1141                                                         
1142                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1143                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1144                                                                                 good = false;
1145                                                                                 break;
1146                                                                         }
1147                                                                 }else {
1148                                                                         good = false;
1149                                                                         break;
1150                                                                 }
1151                                                         }
1152                                                 
1153                                                         //passed the test so add you
1154                                                         if (good) {
1155                                                                 names.insert(name);
1156                                                                 out << name << '\t' << tax << endl;
1157                                                                 break;
1158                                                         }
1159                                                 }
1160                                         }
1161                                 }
1162                         
1163                         }
1164                         
1165                         m->gobble(in);
1166                 }
1167                 in.close();
1168                 out.close();
1169                 
1170                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
1171                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
1172                         
1173                 return 0;
1174
1175         }
1176         catch(exception& e) {
1177                 m->errorOut(e, "GetLineageCommand", "readTax");
1178                 exit(1);
1179         }
1180 }
1181 //**********************************************************************************************************************
1182 int GetLineageCommand::readConsTax(){
1183         try {
1184                 string thisOutputDir = outputDir;
1185                 if (outputDir == "") {  thisOutputDir += m->hasPath(constaxonomy);  }
1186                 map<string, string> variables;
1187                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomy));
1188         variables["[extension]"] = m->getExtension(constaxonomy);
1189                 string outputFileName = getOutputFileName("constaxonomy", variables);
1190                 ofstream out;
1191                 m->openOutputFile(outputFileName, out);
1192                 
1193                 ifstream in;
1194                 m->openInputFile(constaxonomy, in);
1195                 string otuLabel, tax;
1196         int numReps;
1197         
1198         //read headers
1199         string headers = m->getline(in);
1200         out << headers << endl;
1201                 
1202                 //bool wroteSomething = false;
1203                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1204                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1205                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1206                 
1207                 for (int i = 0; i < listOfTaxons.size(); i++) {
1208                         noConfidenceTaxons[i] = listOfTaxons[i];
1209                         int hasConPos = listOfTaxons[i].find_first_of('(');
1210                         if (hasConPos != string::npos) {
1211                                 taxonsHasConfidence[i] = true;
1212                                 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1213                                 noConfidenceTaxons[i] = listOfTaxons[i];
1214                                 m->removeConfidences(noConfidenceTaxons[i]);
1215                         }
1216                 }
1217                 
1218         
1219                 while(!in.eof()){
1220             
1221                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1222             
1223                         in >> otuLabel;                 m->gobble(in);
1224             in >> numReps;          m->gobble(in);
1225                         in >> tax;              m->gobble(in);
1226                         
1227             string noQuotesTax = m->removeQuotes(tax);
1228             
1229                         for (int j = 0; j < listOfTaxons.size(); j++) {
1230                 
1231                                 string newtax = noQuotesTax;
1232                 
1233                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1234                                 if (!taxonsHasConfidence[j]) {
1235                                         int hasConfidences = noQuotesTax.find_first_of('(');
1236                                         if (hasConfidences != string::npos) {
1237                                                 newtax = noQuotesTax;
1238                                                 m->removeConfidences(newtax);
1239                                         }
1240                     
1241                                         int pos = newtax.find(noConfidenceTaxons[j]);
1242                     
1243                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
1244                                                 names.insert(otuLabel);
1245                                                 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1246                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
1247                                                 break;
1248                                         }
1249                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
1250                                         int hasConfidences = noQuotesTax.find_first_of('(');
1251                                         if (hasConfidences == string::npos) {
1252                         
1253                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1254                         
1255                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
1256                                                         names.insert(otuLabel);
1257                                                         out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1258                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
1259                                                         break;
1260                                                 }
1261                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1262                                                 //first remove confidences from both and see if the taxonomy exists
1263                         
1264                                                 string noNewTax = noQuotesTax;
1265                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1266                                                 if (hasConfidences != string::npos) {
1267                                                         noNewTax = noQuotesTax;
1268                                                         m->removeConfidences(noNewTax);
1269                                                 }
1270                         
1271                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1272                         
1273                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1274                             
1275                                                         bool good = true;
1276                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1277                             
1278                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1279                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1280                                                         int index = 0;
1281                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1282                                 
1283                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1284                                                                         index = i;
1285                                                                         int spot = 0;
1286                                                                         bool goodspot = true;
1287                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1288                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1289                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1290                                                                                 else { spot++; }
1291                                                                         }
1292                                     
1293                                                                         if (goodspot) { break; }
1294                                                                 }
1295                                                         }
1296                             
1297                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1298                                 
1299                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1300                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1301                                                                                 good = false;
1302                                                                                 break;
1303                                                                         }
1304                                                                 }else {
1305                                                                         good = false;
1306                                                                         break;
1307                                                                 }
1308                                                         }
1309                             
1310                                                         //passed the test so add you
1311                                                         if (good) {
1312                                                                 names.insert(otuLabel);
1313                                                                 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1314                                                                 break;
1315                                                         }
1316                                                 }
1317                                         }
1318                                 }
1319                 
1320                         }
1321                 }
1322                 in.close();
1323                 out.close();
1324                 
1325                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any OTUs from " + taxons + "."); m->mothurOutEndLine();  }
1326                 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
1327         
1328                 return 0;
1329         
1330         }
1331         catch(exception& e) {
1332                 m->errorOut(e, "GetLineageCommand", "readConsTax");
1333                 exit(1);
1334         }
1335 }
1336 /**************************************************************************************************/
1337 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
1338         try {
1339         
1340                 vector< map<string, float> > t;
1341                 string taxon = "";
1342                 int taxLength = tax.length();
1343         
1344                 for(int i=0;i<taxLength;i++){
1345                         if(tax[i] == ';'){
1346                 
1347                                 int openParen = taxon.find_last_of('(');
1348                                 int closeParen = taxon.find_last_of(')');
1349                                 
1350                                 string newtaxon, confidence;
1351                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
1352                     string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
1353                     if (m->isNumeric1(confidenceScore)) {  //its a confidence
1354                         newtaxon = taxon.substr(0, openParen); //rip off confidence
1355                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
1356                     }else { //its part of the taxon
1357                         newtaxon = taxon;
1358                         confidence = "0";
1359                     }
1360                                 }else{
1361                                         newtaxon = taxon;
1362                                         confidence = "0";
1363                                 } 
1364                                 float con = 0;
1365                                 convert(confidence, con);
1366                                 
1367                                 map<string, float> temp;
1368                                 temp[newtaxon] = con;
1369                 
1370                                 t.push_back(temp);
1371                                 taxon = "";
1372                         }
1373                         else{
1374                                 taxon += tax[i];
1375                 
1376                         }
1377                 }
1378                 
1379                 return t;
1380         }
1381         catch(exception& e) {
1382                 m->errorOut(e, "GetLineageCommand", "getTaxons");
1383                 exit(1);
1384         }
1385 }
1386 //**********************************************************************************************************************
1387 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
1388 int GetLineageCommand::readAlign(){
1389         try {
1390                 string thisOutputDir = outputDir;
1391                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
1392         map<string, string> variables; 
1393                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1394         variables["[extension]"] = m->getExtension(alignfile);
1395                 string outputFileName = getOutputFileName("alignreport", variables);
1396                 
1397                 ofstream out;
1398                 m->openOutputFile(outputFileName, out);
1399                 
1400
1401                 ifstream in;
1402                 m->openInputFile(alignfile, in);
1403                 string name, junk;
1404                 
1405                 bool wroteSomething = false;
1406                 
1407                 //read column headers
1408                 for (int i = 0; i < 16; i++) {  
1409                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1410                         else                    {       break;                  }
1411                 }
1412                 out << endl;
1413                 
1414                 while(!in.eof()){
1415                 
1416                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1417
1418
1419                         in >> name;                             //read from first column
1420                         
1421                         //if this name is in the accnos file
1422                         if (names.count(name) != 0) {
1423                                 wroteSomething = true;
1424                                 
1425                                 out << name << '\t';
1426                                 
1427                                 //read rest
1428                                 for (int i = 0; i < 15; i++) {  
1429                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1430                                         else                    {       break;                  }
1431                                 }
1432                                 out << endl;
1433                                 
1434                         }else {//still read just don't do anything with it
1435                                 //read rest
1436                                 for (int i = 0; i < 15; i++) {  
1437                                         if (!in.eof())  {       in >> junk;             }
1438                                         else                    {       break;                  }
1439                                 }
1440                         }
1441                         
1442                         m->gobble(in);
1443                 }
1444                 in.close();
1445                 out.close();
1446                 
1447                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
1448                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
1449                 
1450                 return 0;
1451                 
1452         }
1453         catch(exception& e) {
1454                 m->errorOut(e, "GetLineageCommand", "readAlign");
1455                 exit(1);
1456         }
1457 }
1458 //**********************************************************************************************************************
1459
1460
1461
1462