]> git.donarmstrong.com Git - mothur.git/blob - getlineagecommand.cpp
fixes while testing 1.33.0
[mothur.git] / getlineagecommand.cpp
1 /*
2  *  getlineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> GetLineageCommand::setParameters(){      
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false, true); parameters.push_back(pfasta);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false, true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false, true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false, true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false, true); parameters.push_back(plist);
24         CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
25                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
26         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
27                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
28         CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
29                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true, true); parameters.push_back(ptaxon);
30                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "GetLineageCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string GetLineageCommand::getHelpString(){      
45         try {
46                 string helpString = "";
47                 helpString += "The get.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n";
48                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
49                 helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups.  You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n";
50                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
51                 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
52                 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
53                 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
54          helpString += "The label parameter is used to analyze specific labels in your input. \n";
55                 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
56                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
57                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
58                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "GetLineageCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string GetLineageCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         
72         if (type == "fasta")                {   pattern = "[filename],pick,[extension]";    }
73         else if (type == "taxonomy")        {   pattern = "[filename],pick,[extension]";    }
74         else if (type == "constaxonomy")    {   pattern = "[filename],pick,[extension]";    }
75         else if (type == "name")            {   pattern = "[filename],pick,[extension]";    }
76         else if (type == "group")           {   pattern = "[filename],pick,[extension]";    }
77         else if (type == "count")           {   pattern = "[filename],pick,[extension]";    }
78         else if (type == "list")            {   pattern = "[filename],[distance],pick,[extension]";    }
79         else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
80         else if (type == "alignreport")     {   pattern = "[filename],pick.align.report";    }
81         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
82         
83         return pattern;
84     }
85     catch(exception& e) {
86         m->errorOut(e, "GetLineageCommand", "getOutputPattern");
87         exit(1);
88     }
89 }
90 //**********************************************************************************************************************
91 GetLineageCommand::GetLineageCommand(){ 
92         try {
93                 abort = true; calledHelp = true; 
94                 setParameters();
95                 vector<string> tempOutNames;
96                 outputTypes["fasta"] = tempOutNames;
97                 outputTypes["taxonomy"] = tempOutNames;
98                 outputTypes["name"] = tempOutNames;
99                 outputTypes["group"] = tempOutNames;
100                 outputTypes["alignreport"] = tempOutNames;
101                 outputTypes["list"] = tempOutNames;
102         outputTypes["count"] = tempOutNames;
103         outputTypes["constaxonomy"] = tempOutNames;
104         outputTypes["shared"] = tempOutNames;
105         }
106         catch(exception& e) {
107                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
108                 exit(1);
109         }
110 }
111 //**********************************************************************************************************************
112 GetLineageCommand::GetLineageCommand(string option)  {
113         try {
114                 abort = false; calledHelp = false;   
115                                 
116                 //allow user to run help
117                 if(option == "help") { help(); abort = true; calledHelp = true; }
118                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
119                 
120                 else {
121                         vector<string> myArray = setParameters();
122                         
123                         OptionParser parser(option);
124                         map<string,string> parameters = parser.getParameters();
125                         
126                         ValidParameters validParameter;
127                         map<string,string>::iterator it;
128                         
129                         //check to make sure all parameters are valid for command
130                         for (it = parameters.begin(); it != parameters.end(); it++) { 
131                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
132                         }
133                         
134                         //initialize outputTypes
135                         vector<string> tempOutNames;
136                         outputTypes["fasta"] = tempOutNames;
137                         outputTypes["taxonomy"] = tempOutNames;
138                         outputTypes["name"] = tempOutNames;
139                         outputTypes["group"] = tempOutNames;
140                         outputTypes["alignreport"] = tempOutNames;
141                         outputTypes["list"] = tempOutNames;
142             outputTypes["count"] = tempOutNames;
143             outputTypes["constaxonomy"] = tempOutNames;
144             outputTypes["shared"] = tempOutNames;
145
146                         //if the user changes the output directory command factory will send this info to us in the output parameter 
147                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
148                         
149                         //if the user changes the input directory command factory will send this info to us in the output parameter 
150                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
151                         if (inputDir == "not found"){   inputDir = "";          }
152                         else {
153                                 string path;
154                                 it = parameters.find("alignreport");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
160                                 }
161                                 
162                                 it = parameters.find("fasta");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
168                                 }
169                                 
170                                 it = parameters.find("list");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("name");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
184                                 }
185                                 
186                                 it = parameters.find("group");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
192                                 }
193                                 
194                                 it = parameters.find("taxonomy");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
200                                 }
201                 
202                 it = parameters.find("count");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
208                                 }
209                 
210                 it = parameters.find("constaxonomy");
211                                 //user has given a template file
212                                 if(it != parameters.end()){
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
216                                 }
217                 
218                 it = parameters.find("shared");
219                                 //user has given a template file
220                                 if(it != parameters.end()){
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
224                                 }
225                         }
226
227                         
228                         //check for required parameters                 
229                         fastafile = validParameter.validFile(parameters, "fasta", true);
230                         if (fastafile == "not open") { fastafile = ""; abort = true; }
231                         else if (fastafile == "not found") {  fastafile = "";  }
232                         else { m->setFastaFile(fastafile); }
233                         
234                         namefile = validParameter.validFile(parameters, "name", true);
235                         if (namefile == "not open") { namefile = ""; abort = true; }
236                         else if (namefile == "not found") {  namefile = "";  }  
237                         else { m->setNameFile(namefile); }
238                         
239                         groupfile = validParameter.validFile(parameters, "group", true);
240                         if (groupfile == "not open") { abort = true; }
241                         else if (groupfile == "not found") {  groupfile = "";  }        
242                         else { m->setGroupFile(groupfile); }
243                         
244                         alignfile = validParameter.validFile(parameters, "alignreport", true);
245                         if (alignfile == "not open") { abort = true; }
246                         else if (alignfile == "not found") {  alignfile = "";  }
247                         
248                         listfile = validParameter.validFile(parameters, "list", true);
249                         if (listfile == "not open") { abort = true; }
250                         else if (listfile == "not found") {  listfile = "";  }
251                         else { m->setListFile(listfile); }
252                         
253                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
254                         if (taxfile == "not open") { taxfile = ""; abort = true; }
255                         else if (taxfile == "not found") {  taxfile = "";               }
256                         else { m->setTaxonomyFile(taxfile); }
257             
258             sharedfile = validParameter.validFile(parameters, "shared", true);
259                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
260                         else if (sharedfile == "not found") {  sharedfile = "";         }
261                         else { m->setSharedFile(sharedfile); }
262
263             
264             constaxonomy = validParameter.validFile(parameters, "constaxonomy", true);
265                         if (constaxonomy == "not open") { constaxonomy = ""; abort = true; }
266                         else if (constaxonomy == "not found") {  constaxonomy = "";             }
267     
268             if ((constaxonomy == "") && (taxfile == "")) {
269                 taxfile = m->getTaxonomyFile();
270                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
271                 else {
272                     m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
273                         }
274             
275                         string usedDups = "true";
276                         string temp = validParameter.validFile(parameters, "dups", false);      
277                         if (temp == "not found") { 
278                                 if (namefile != "") {  temp = "true";                                   }
279                                 else                            {  temp = "false"; usedDups = "";       }
280                         }
281                         dups = m->isTrue(temp);
282             
283             countfile = validParameter.validFile(parameters, "count", true);
284             if (countfile == "not open") { countfile = ""; abort = true; }
285             else if (countfile == "not found") { countfile = "";  }     
286             else { m->setCountTableFile(countfile); }
287             
288             if ((namefile != "") && (countfile != "")) {
289                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
290             }
291             
292             if ((groupfile != "") && (countfile != "")) {
293                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
294             }
295                         
296                         taxons = validParameter.validFile(parameters, "taxon", false);  
297                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
298                         else { 
299                                 //rip off quotes
300                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
301                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
302                         }
303                         m->splitAtChar(taxons, listOfTaxons, '-');
304                         
305                         if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile."); m->mothurOutEndLine(); abort = true; }
306             
307             if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) {
308                 m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n");  abort = true;
309             }
310             
311             if ((constaxonomy != "") && (taxfile != "")) {
312                 m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true;
313             }
314             
315             if ((sharedfile != "") && (taxfile != "")) {
316                 m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true;
317             }
318             
319             if ((sharedfile != "") || (listfile != "")) {
320                 label = validParameter.validFile(parameters, "label", false);
321                 if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); }
322             }
323             
324             if (countfile == "") {
325                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
326                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
327                     parser.getNameFile(files);
328                 }
329             }
330                 }
331
332         }
333         catch(exception& e) {
334                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
335                 exit(1);
336         }
337 }
338 //**********************************************************************************************************************
339
340 int GetLineageCommand::execute(){
341         try {
342                 
343                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
344                 
345                 if (m->control_pressed) { return 0; }
346         
347         if (countfile != "") {
348             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
349                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
350             }
351         }
352                 
353                 //read through the correct file and output lines you want to keep
354                 if (taxfile != "")                      {
355             readTax(); //fills the set of names to get
356             if (namefile != "")                 {               readName();             }
357             if (fastafile != "")                {               readFasta();    }
358             if (countfile != "")                {               readCount();    }
359             if (groupfile != "")                {               readGroup();    }
360             if (alignfile != "")                {               readAlign();    }
361             if (listfile != "")                 {               readList();             }
362
363         }else {
364             readConsTax();
365             if (listfile != "")                 {               readConsList();         }
366             if (sharedfile != "")               {               readShared();           }
367         }
368                                 
369                 
370                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
371                 
372                 if (outputNames.size() != 0) {
373                         m->mothurOutEndLine();
374                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
375                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
376                         m->mothurOutEndLine();
377                         
378                         //set fasta file as new current fastafile
379                         string current = "";
380                         itTypes = outputTypes.find("fasta");
381                         if (itTypes != outputTypes.end()) {
382                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
383                         }
384                         
385                         itTypes = outputTypes.find("name");
386                         if (itTypes != outputTypes.end()) {
387                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
388                         }
389                         
390                         itTypes = outputTypes.find("group");
391                         if (itTypes != outputTypes.end()) {
392                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
393                         }
394                         
395                         itTypes = outputTypes.find("list");
396                         if (itTypes != outputTypes.end()) {
397                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
398                         }
399             
400             itTypes = outputTypes.find("shared");
401                         if (itTypes != outputTypes.end()) {
402                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
403                         }
404                         
405                         itTypes = outputTypes.find("taxonomy");
406                         if (itTypes != outputTypes.end()) {
407                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
408                         }
409                         
410             itTypes = outputTypes.find("count");
411                         if (itTypes != outputTypes.end()) {
412                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
413                         }
414                 }
415                 
416                 return 0;               
417         }
418
419         catch(exception& e) {
420                 m->errorOut(e, "GetLineageCommand", "execute");
421                 exit(1);
422         }
423 }
424
425 //**********************************************************************************************************************
426 int GetLineageCommand::readFasta(){
427         try {
428                 string thisOutputDir = outputDir;
429                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
430                 map<string, string> variables; 
431         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
432         variables["[extension]"] = m->getExtension(fastafile);
433                 string outputFileName = getOutputFileName("fasta", variables);
434                 ofstream out;
435                 m->openOutputFile(outputFileName, out);
436                 
437                 
438                 ifstream in;
439                 m->openInputFile(fastafile, in);
440                 string name;
441                 
442                 bool wroteSomething = false;
443                 
444                 while(!in.eof()){
445                 
446                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
447                         
448                         Sequence currSeq(in);
449                         name = currSeq.getName();
450                         
451                         if (name != "") {
452                                 //if this name is in the accnos file
453                                 if (names.count(name) != 0) {
454                                         wroteSomething = true;
455                                         
456                                         currSeq.printSequence(out);
457                                 }
458                         }
459                         m->gobble(in);
460                 }
461                 in.close();     
462                 out.close();
463                 
464                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
465                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
466                 
467                 return 0;
468
469         }
470         catch(exception& e) {
471                 m->errorOut(e, "GetLineageCommand", "readFasta");
472                 exit(1);
473         }
474 }
475 //**********************************************************************************************************************
476 int GetLineageCommand::readCount(){
477         try {
478                 string thisOutputDir = outputDir;
479                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
480                 map<string, string> variables; 
481                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
482         variables["[extension]"] = m->getExtension(countfile);
483                 string outputFileName = getOutputFileName("count", variables);
484                 
485                 ofstream out;
486                 m->openOutputFile(outputFileName, out);
487                 
488                 ifstream in;
489                 m->openInputFile(countfile, in);
490                 
491                 bool wroteSomething = false;
492                 
493         string headers = m->getline(in); m->gobble(in);
494         out << headers << endl;
495         
496         string name, rest; int thisTotal;
497         while (!in.eof()) {
498             
499             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
500             
501             in >> name; m->gobble(in); 
502             in >> thisTotal; m->gobble(in);
503             rest = m->getline(in); m->gobble(in);
504             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
505             
506             if (names.count(name) != 0) {
507                 out << name << '\t' << thisTotal << '\t' << rest << endl;
508                 wroteSomething = true;
509             }
510         }
511         in.close();
512                 out.close();
513         
514         //check for groups that have been eliminated
515         CountTable ct;
516         if (ct.testGroups(outputFileName)) {
517             ct.readTable(outputFileName, true, false);
518             ct.printTable(outputFileName);
519         }
520
521                 
522                 if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
523                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
524                        
525                 return 0;
526         }
527         catch(exception& e) {
528                 m->errorOut(e, "GetLineageCommand", "readCount");
529                 exit(1);
530         }
531 }
532 //**********************************************************************************************************************
533 int GetLineageCommand::readList(){
534         try {
535                 string thisOutputDir = outputDir;
536                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
537                 map<string, string> variables; 
538         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
539         variables["[extension]"] = m->getExtension(listfile);
540                                 
541                 ifstream in;
542                 m->openInputFile(listfile, in);
543                 
544                 bool wroteSomething = false;
545                 
546                 while(!in.eof()){
547
548                         //read in list vector
549                         ListVector list(in);
550                         
551                         //make a new list vector
552                         ListVector newList;
553                         newList.setLabel(list.getLabel());
554             
555             variables["[distance]"] = list.getLabel();
556             string outputFileName = getOutputFileName("list", variables);
557                         
558                         ofstream out;
559                         m->openOutputFile(outputFileName, out);
560                         outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
561             
562             if (m->control_pressed) { in.close(); out.close(); return 0; }
563             
564             vector<string> binLabels = list.getLabels();
565             vector<string> newBinLabels;
566                         
567                         //for each bin
568                         for (int i = 0; i < list.getNumBins(); i++) {
569                         
570                                 //parse out names that are in accnos file
571                                 string binnames = list.get(i);
572                 vector<string> bnames;
573                 m->splitAtComma(binnames, bnames);
574                                 
575                                 string newNames = "";
576                 for (int j = 0; j < bnames.size(); j++) {
577                                         string name = bnames[j];
578                                         //if that name is in the .accnos file, add it
579                                         if (names.count(name) != 0) {  newNames += name + ",";  }
580                                 }
581
582
583                                 //if there are names in this bin add to new list
584                                 if (newNames != "") { 
585                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
586                                         newList.push_back(newNames);
587                     newBinLabels.push_back(binLabels[i]);
588                                 }
589                         }
590                                 
591                         //print new listvector
592                         if (newList.getNumBins() != 0) {
593                                 wroteSomething = true;
594                                 newList.setLabels(newBinLabels);
595                 newList.printHeaders(out);
596                                 newList.print(out);
597                         }
598                         
599                         m->gobble(in);
600             out.close();
601                 }
602                 in.close();     
603                 
604                 
605                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
606                 
607                 return 0;
608
609         }
610         catch(exception& e) {
611                 m->errorOut(e, "GetLineageCommand", "readList");
612                 exit(1);
613         }
614 }
615 //**********************************************************************************************************************
616 int GetLineageCommand::readConsList(){
617         try {
618                 getListVector();
619         
620         if (m->control_pressed) { delete list; return 0;}
621         
622         ListVector newList;
623         newList.setLabel(list->getLabel());
624         int selectedCount = 0;
625         bool wroteSomething = false;
626         string snumBins = toString(list->getNumBins());
627         
628         vector<string> binLabels = list->getLabels();
629         vector<string> newBinLabels;
630         for (int i = 0; i < list->getNumBins(); i++) {
631             
632             if (m->control_pressed) { delete list; return 0;}
633             
634             //create a label for this otu
635             string otuLabel = "Otu";
636             string sbinNumber = toString(i+1);
637             if (sbinNumber.length() < snumBins.length()) {
638                 int diff = snumBins.length() - sbinNumber.length();
639                 for (int h = 0; h < diff; h++) { otuLabel += "0"; }
640             }
641             otuLabel += sbinNumber;
642             
643             if (names.count(m->getSimpleLabel(otuLabel)) != 0) {
644                                 selectedCount++;
645                 newList.push_back(list->get(i));
646                 newBinLabels.push_back(binLabels[i]);
647             }
648         }
649         
650         string thisOutputDir = outputDir;
651                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
652         map<string, string> variables;
653                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
654         variables["[extension]"] = m->getExtension(listfile);
655         variables["[distance]"] = list->getLabel();
656                 string outputFileName = getOutputFileName("list", variables);
657                 ofstream out;
658                 m->openOutputFile(outputFileName, out);
659         
660                 delete list;
661         //print new listvector
662         if (newList.getNumBins() != 0) {
663             wroteSomething = true;
664             newList.setLabels(newBinLabels);
665             newList.printHeaders(out);
666             newList.print(out);
667         }
668                 out.close();
669                 
670                 if (wroteSomething == false) { m->mothurOut("Your file does not contain OTUs from " + taxons + "."); m->mothurOutEndLine();  }
671                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
672                 
673                 m->mothurOut("Selected " + toString(selectedCount) + " OTUs from your list file."); m->mothurOutEndLine();
674         
675                 return 0;
676         
677         }
678         catch(exception& e) {
679                 m->errorOut(e, "GetLineageCommand", "readConsList");
680                 exit(1);
681         }
682 }
683 //**********************************************************************************************************************
684 int GetLineageCommand::getListVector(){
685         try {
686                 InputData input(listfile, "list");
687                 list = input.getListVector();
688                 string lastLabel = list->getLabel();
689                 
690                 if (label == "") { label = lastLabel;  return 0; }
691                 
692                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
693                 set<string> labels; labels.insert(label);
694                 set<string> processedLabels;
695                 set<string> userLabels = labels;
696                 
697                 //as long as you are not at the end of the file or done wih the lines you want
698                 while((list != NULL) && (userLabels.size() != 0)) {
699                         if (m->control_pressed) {  return 0;  }
700                         
701                         if(labels.count(list->getLabel()) == 1){
702                                 processedLabels.insert(list->getLabel());
703                                 userLabels.erase(list->getLabel());
704                                 break;
705                         }
706                         
707                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
708                                 string saveLabel = list->getLabel();
709                                 
710                                 delete list;
711                                 list = input.getListVector(lastLabel);
712                                 
713                                 processedLabels.insert(list->getLabel());
714                                 userLabels.erase(list->getLabel());
715                                 
716                                 //restore real lastlabel to save below
717                                 list->setLabel(saveLabel);
718                                 break;
719                         }
720                         
721                         lastLabel = list->getLabel();
722                         
723                         //get next line to process
724                         //prevent memory leak
725                         delete list;
726                         list = input.getListVector();
727                 }
728                 
729                 
730                 if (m->control_pressed) {  return 0;  }
731                 
732                 //output error messages about any remaining user labels
733                 set<string>::iterator it;
734                 bool needToRun = false;
735                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
736                         m->mothurOut("Your file does not include the label " + *it);
737                         if (processedLabels.count(lastLabel) != 1) {
738                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
739                                 needToRun = true;
740                         }else {
741                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
742                         }
743                 }
744                 
745                 //run last label if you need to
746                 if (needToRun == true)  {
747                         delete list;
748                         list = input.getListVector(lastLabel);
749                 }
750                 
751                 return 0;
752         }
753         catch(exception& e) {
754                 m->errorOut(e, "GetLineageCommand", "getListVector");
755                 exit(1);
756         }
757 }
758
759 //**********************************************************************************************************************
760 int GetLineageCommand::readShared(){
761         try {
762         
763         getShared();
764         
765         if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
766         
767         vector<string> newLabels;
768         
769         //create new "filtered" lookup
770         vector<SharedRAbundVector*> newLookup;
771         for (int i = 0; i < lookup.size(); i++) {
772             SharedRAbundVector* temp = new SharedRAbundVector();
773                         temp->setLabel(lookup[i]->getLabel());
774                         temp->setGroup(lookup[i]->getGroup());
775                         newLookup.push_back(temp);
776         }
777         
778         bool wroteSomething = false;
779         int numSelected = 0;
780         for (int i = 0; i < lookup[0]->getNumBins(); i++) {
781             
782             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
783             
784             //is this otu on the list
785             if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) != 0) {
786                 numSelected++; wroteSomething = true;
787                 newLabels.push_back(m->currentSharedBinLabels[i]);
788                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
789                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
790                 }
791             }
792         }
793         
794         string thisOutputDir = outputDir;
795                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
796         map<string, string> variables;
797                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
798         variables["[extension]"] = m->getExtension(sharedfile);
799         variables["[distance]"] = lookup[0]->getLabel();
800                 string outputFileName = getOutputFileName("shared", variables);
801         ofstream out;
802                 m->openOutputFile(outputFileName, out);
803                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
804         
805                 for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
806         
807         m->currentSharedBinLabels = newLabels;
808         
809                 newLookup[0]->printHeaders(out);
810                 
811                 for (int i = 0; i < newLookup.size(); i++) {
812                         out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
813                         newLookup[i]->print(out);
814                 }
815                 out.close();
816         
817         for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
818         
819         if (wroteSomething == false) { m->mothurOut("Your file does not contain OTUs from " + taxons + "."); m->mothurOutEndLine();  }
820         
821                 m->mothurOut("Selected " + toString(numSelected) + " OTUs from your shared file."); m->mothurOutEndLine();
822         
823         return 0;
824     }
825         catch(exception& e) {
826                 m->errorOut(e, "GetLineageCommand", "readShared");
827                 exit(1);
828         }
829 }
830 //**********************************************************************************************************************
831 int GetLineageCommand::getShared(){
832         try {
833                 InputData input(sharedfile, "sharedfile");
834                 lookup = input.getSharedRAbundVectors();
835                 string lastLabel = lookup[0]->getLabel();
836                 
837                 if (label == "") { label = lastLabel;  return 0; }
838                 
839                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
840                 set<string> labels; labels.insert(label);
841                 set<string> processedLabels;
842                 set<string> userLabels = labels;
843                 
844                 //as long as you are not at the end of the file or done wih the lines you want
845                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
846                         if (m->control_pressed) {   return 0;  }
847                         
848                         if(labels.count(lookup[0]->getLabel()) == 1){
849                                 processedLabels.insert(lookup[0]->getLabel());
850                                 userLabels.erase(lookup[0]->getLabel());
851                                 break;
852                         }
853                         
854                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
855                                 string saveLabel = lookup[0]->getLabel();
856                                 
857                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
858                                 lookup = input.getSharedRAbundVectors(lastLabel);
859                                 
860                                 processedLabels.insert(lookup[0]->getLabel());
861                                 userLabels.erase(lookup[0]->getLabel());
862                                 
863                                 //restore real lastlabel to save below
864                                 lookup[0]->setLabel(saveLabel);
865                                 break;
866                         }
867                         
868                         lastLabel = lookup[0]->getLabel();
869                         
870                         //get next line to process
871                         //prevent memory leak
872                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
873                         lookup = input.getSharedRAbundVectors();
874                 }
875                 
876                 
877                 if (m->control_pressed) {  return 0;  }
878                 
879                 //output error messages about any remaining user labels
880                 set<string>::iterator it;
881                 bool needToRun = false;
882                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
883                         m->mothurOut("Your file does not include the label " + *it);
884                         if (processedLabels.count(lastLabel) != 1) {
885                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
886                                 needToRun = true;
887                         }else {
888                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
889                         }
890                 }
891                 
892                 //run last label if you need to
893                 if (needToRun == true)  {
894                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } }
895                         lookup = input.getSharedRAbundVectors(lastLabel);
896                 }
897                 
898                 return 0;
899         }
900         catch(exception& e) {
901                 m->errorOut(e, "GetLineageCommand", "getShared");
902                 exit(1);
903         }
904 }
905
906 //**********************************************************************************************************************
907 int GetLineageCommand::readName(){
908         try {
909                 string thisOutputDir = outputDir;
910                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
911                 map<string, string> variables; 
912                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
913         variables["[extension]"] = m->getExtension(namefile);
914                 string outputFileName = getOutputFileName("name", variables);
915                 ofstream out;
916                 m->openOutputFile(outputFileName, out);
917                 
918
919                 ifstream in;
920                 m->openInputFile(namefile, in);
921                 string name, firstCol, secondCol;
922                 
923                 bool wroteSomething = false;
924                 
925                 
926                 while(!in.eof()){
927                 
928                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
929
930                         in >> firstCol;                         
931                         in >> secondCol;
932                         
933                         string hold = "";
934                         if (dups) { hold = secondCol; }
935                         
936                         vector<string> parsedNames;
937                         m->splitAtComma(secondCol, parsedNames);
938                         
939                         vector<string> validSecond;
940                         for (int i = 0; i < parsedNames.size(); i++) {
941                                 if (names.count(parsedNames[i]) != 0) {
942                                         validSecond.push_back(parsedNames[i]);
943                                 }
944                         }
945
946                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
947                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
948                                 out << firstCol << '\t' << hold << endl;
949                                 wroteSomething = true;
950                         }else {
951                                 //if the name in the first column is in the set then print it and any other names in second column also in set
952                                 if (names.count(firstCol) != 0) {
953                                 
954                                         wroteSomething = true;
955                                         
956                                         out << firstCol << '\t';
957                                         
958                                         //you know you have at least one valid second since first column is valid
959                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
960                                         out << validSecond[validSecond.size()-1] << endl;
961                                         
962                                 
963                                 //make first name in set you come to first column and then add the remaining names to second column
964                                 }else {
965                                         //you want part of this row
966                                         if (validSecond.size() != 0) {
967                                         
968                                                 wroteSomething = true;
969                                                 
970                                                 out << validSecond[0] << '\t';
971                                         
972                                                 //you know you have at least one valid second since first column is valid
973                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
974                                                 out << validSecond[validSecond.size()-1] << endl;
975                                         }
976                                 }
977                         }
978                         m->gobble(in);
979                 }
980                 in.close();
981                 out.close();
982                 
983                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
984                 outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
985                 
986                 return 0;
987                 
988         }
989         catch(exception& e) {
990                 m->errorOut(e, "GetLineageCommand", "readName");
991                 exit(1);
992         }
993 }
994
995 //**********************************************************************************************************************
996 int GetLineageCommand::readGroup(){
997         try {
998                 string thisOutputDir = outputDir;
999                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
1000                 map<string, string> variables; 
1001                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
1002         variables["[extension]"] = m->getExtension(groupfile);
1003                 string outputFileName = getOutputFileName("group", variables);
1004                 ofstream out;
1005                 m->openOutputFile(outputFileName, out);
1006                 
1007
1008                 ifstream in;
1009                 m->openInputFile(groupfile, in);
1010                 string name, group;
1011                 
1012                 bool wroteSomething = false;
1013                 
1014                 while(!in.eof()){
1015
1016                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1017
1018
1019                         in >> name;                             //read from first column
1020                         in >> group;                    //read from second column
1021                         
1022                         //if this name is in the accnos file
1023                         if (names.count(name) != 0) {
1024                                 wroteSomething = true;
1025                                 
1026                                 out << name << '\t' << group << endl;
1027                         }
1028                                         
1029                         m->gobble(in);
1030                 }
1031                 in.close();
1032                 out.close();
1033                 
1034                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
1035                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
1036                 
1037                 return 0;
1038
1039         }
1040         catch(exception& e) {
1041                 m->errorOut(e, "GetLineageCommand", "readGroup");
1042                 exit(1);
1043         }
1044 }
1045 //**********************************************************************************************************************
1046 int GetLineageCommand::readTax(){
1047         try {
1048                 string thisOutputDir = outputDir;
1049                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1050                 map<string, string> variables; 
1051                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1052         variables["[extension]"] = m->getExtension(taxfile);
1053                 string outputFileName = getOutputFileName("taxonomy", variables);
1054                 ofstream out;
1055                 m->openOutputFile(outputFileName, out);
1056                 
1057                 ifstream in;
1058                 m->openInputFile(taxfile, in);
1059                 string name, tax;
1060                 
1061                 //bool wroteSomething = false;
1062                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1063                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1064                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1065                 
1066                 for (int i = 0; i < listOfTaxons.size(); i++) {
1067                         noConfidenceTaxons[i] = listOfTaxons[i];
1068                         int hasConPos = listOfTaxons[i].find_first_of('(');
1069                         if (hasConPos != string::npos) {  
1070                                 taxonsHasConfidence[i] = true; 
1071                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
1072                                 noConfidenceTaxons[i] = listOfTaxons[i];
1073                                 m->removeConfidences(noConfidenceTaxons[i]);
1074                         }
1075                 }
1076                 
1077                 
1078                 while(!in.eof()){
1079
1080                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1081
1082                         in >> name;                             //read from first column
1083                         in >> tax;                      //read from second column
1084                         
1085             string noQuotesTax = m->removeQuotes(tax);
1086             
1087                         for (int j = 0; j < listOfTaxons.size(); j++) {
1088                                                         
1089                                 string newtax = noQuotesTax;
1090                         
1091                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1092                                 if (!taxonsHasConfidence[j]) {
1093                                         int hasConfidences = noQuotesTax.find_first_of('(');
1094                                         if (hasConfidences != string::npos) { 
1095                                                 newtax = noQuotesTax;
1096                                                 m->removeConfidences(newtax);
1097                                         }
1098                                 
1099                                         int pos = newtax.find(noConfidenceTaxons[j]);
1100                                 
1101                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
1102                                                 names.insert(name); 
1103                                                 out << name << '\t' << tax << endl;
1104                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
1105                                                 break;
1106                                         }
1107                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
1108                                         int hasConfidences = noQuotesTax.find_first_of('(');
1109                                         if (hasConfidences == string::npos) { 
1110                                         
1111                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1112                                         
1113                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
1114                                                         names.insert(name);
1115                                                         out << name << '\t' << tax << endl;
1116                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
1117                                                         break;
1118                                                 }
1119                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1120                                                 //first remove confidences from both and see if the taxonomy exists
1121                                         
1122                                                 string noNewTax = noQuotesTax;
1123                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1124                                                 if (hasConfidences != string::npos) { 
1125                                                         noNewTax = noQuotesTax;
1126                                                         m->removeConfidences(noNewTax);
1127                                                 }
1128                                         
1129                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1130                                         
1131                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1132                                                 
1133                                                         bool good = true;
1134                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1135                                                 
1136                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1137                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1138                                                         int index = 0;
1139                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1140                                                         
1141                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
1142                                                                         index = i;  
1143                                                                         int spot = 0;
1144                                                                         bool goodspot = true;
1145                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1146                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1147                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1148                                                                                 else { spot++; }
1149                                                                         }
1150                                                                 
1151                                                                         if (goodspot) { break; }
1152                                                                 }
1153                                                         }
1154                                                 
1155                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1156                                                         
1157                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1158                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1159                                                                                 good = false;
1160                                                                                 break;
1161                                                                         }
1162                                                                 }else {
1163                                                                         good = false;
1164                                                                         break;
1165                                                                 }
1166                                                         }
1167                                                 
1168                                                         //passed the test so add you
1169                                                         if (good) {
1170                                                                 names.insert(name);
1171                                                                 out << name << '\t' << tax << endl;
1172                                                                 break;
1173                                                         }
1174                                                 }
1175                                         }
1176                                 }
1177                         
1178                         }
1179                         
1180                         m->gobble(in);
1181                 }
1182                 in.close();
1183                 out.close();
1184                 
1185                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
1186                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
1187                         
1188                 return 0;
1189
1190         }
1191         catch(exception& e) {
1192                 m->errorOut(e, "GetLineageCommand", "readTax");
1193                 exit(1);
1194         }
1195 }
1196 //**********************************************************************************************************************
1197 int GetLineageCommand::readConsTax(){
1198         try {
1199                 string thisOutputDir = outputDir;
1200                 if (outputDir == "") {  thisOutputDir += m->hasPath(constaxonomy);  }
1201                 map<string, string> variables;
1202                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomy));
1203         variables["[extension]"] = m->getExtension(constaxonomy);
1204                 string outputFileName = getOutputFileName("constaxonomy", variables);
1205                 ofstream out;
1206                 m->openOutputFile(outputFileName, out);
1207                 
1208                 ifstream in;
1209                 m->openInputFile(constaxonomy, in);
1210                 string otuLabel, tax;
1211         int numReps;
1212         
1213         //read headers
1214         string headers = m->getline(in);
1215         out << headers << endl;
1216                 
1217                 //bool wroteSomething = false;
1218                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1219                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1220                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1221                 
1222                 for (int i = 0; i < listOfTaxons.size(); i++) {
1223                         noConfidenceTaxons[i] = listOfTaxons[i];
1224                         int hasConPos = listOfTaxons[i].find_first_of('(');
1225                         if (hasConPos != string::npos) {
1226                                 taxonsHasConfidence[i] = true;
1227                                 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1228                                 noConfidenceTaxons[i] = listOfTaxons[i];
1229                                 m->removeConfidences(noConfidenceTaxons[i]);
1230                         }
1231                 }
1232                 
1233         
1234                 while(!in.eof()){
1235             
1236                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1237             
1238                         in >> otuLabel;                 m->gobble(in);
1239             in >> numReps;          m->gobble(in);
1240                         in >> tax;              m->gobble(in);
1241                         
1242             string noQuotesTax = m->removeQuotes(tax);
1243             
1244                         for (int j = 0; j < listOfTaxons.size(); j++) {
1245                 
1246                                 string newtax = noQuotesTax;
1247                 
1248                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1249                                 if (!taxonsHasConfidence[j]) {
1250                                         int hasConfidences = noQuotesTax.find_first_of('(');
1251                                         if (hasConfidences != string::npos) {
1252                                                 newtax = noQuotesTax;
1253                                                 m->removeConfidences(newtax);
1254                                         }
1255                     
1256                                         int pos = newtax.find(noConfidenceTaxons[j]);
1257                     
1258                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
1259                                                 names.insert(m->getSimpleLabel(otuLabel));
1260                                                 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1261                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
1262                                                 break;
1263                                         }
1264                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
1265                                         int hasConfidences = noQuotesTax.find_first_of('(');
1266                                         if (hasConfidences == string::npos) {
1267                         
1268                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1269                         
1270                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
1271                                                         names.insert(m->getSimpleLabel(otuLabel));
1272                                                         out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1273                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
1274                                                         break;
1275                                                 }
1276                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1277                                                 //first remove confidences from both and see if the taxonomy exists
1278                         
1279                                                 string noNewTax = noQuotesTax;
1280                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1281                                                 if (hasConfidences != string::npos) {
1282                                                         noNewTax = noQuotesTax;
1283                                                         m->removeConfidences(noNewTax);
1284                                                 }
1285                         
1286                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1287                         
1288                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1289                             
1290                                                         bool good = true;
1291                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1292                             
1293                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1294                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1295                                                         int index = 0;
1296                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1297                                 
1298                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1299                                                                         index = i;
1300                                                                         int spot = 0;
1301                                                                         bool goodspot = true;
1302                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1303                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1304                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1305                                                                                 else { spot++; }
1306                                                                         }
1307                                     
1308                                                                         if (goodspot) { break; }
1309                                                                 }
1310                                                         }
1311                             
1312                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1313                                 
1314                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1315                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1316                                                                                 good = false;
1317                                                                                 break;
1318                                                                         }
1319                                                                 }else {
1320                                                                         good = false;
1321                                                                         break;
1322                                                                 }
1323                                                         }
1324                             
1325                                                         //passed the test so add you
1326                                                         if (good) {
1327                                                                 names.insert(m->getSimpleLabel(otuLabel));
1328                                                                 out << otuLabel << '\t' << numReps << '\t' << tax << endl;
1329                                                                 break;
1330                                                         }
1331                                                 }
1332                                         }
1333                                 }
1334                 
1335                         }
1336                 }
1337                 in.close();
1338                 out.close();
1339                 
1340                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any OTUs from " + taxons + "."); m->mothurOutEndLine();  }
1341                 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
1342         
1343                 return 0;
1344         
1345         }
1346         catch(exception& e) {
1347                 m->errorOut(e, "GetLineageCommand", "readConsTax");
1348                 exit(1);
1349         }
1350 }
1351 /**************************************************************************************************/
1352 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
1353         try {
1354         
1355                 vector< map<string, float> > t;
1356                 string taxon = "";
1357                 int taxLength = tax.length();
1358         
1359                 for(int i=0;i<taxLength;i++){
1360                         if(tax[i] == ';'){
1361                 
1362                                 int openParen = taxon.find_last_of('(');
1363                                 int closeParen = taxon.find_last_of(')');
1364                                 
1365                                 string newtaxon, confidence;
1366                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
1367                     string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
1368                     if (m->isNumeric1(confidenceScore)) {  //its a confidence
1369                         newtaxon = taxon.substr(0, openParen); //rip off confidence
1370                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
1371                     }else { //its part of the taxon
1372                         newtaxon = taxon;
1373                         confidence = "0";
1374                     }
1375                                 }else{
1376                                         newtaxon = taxon;
1377                                         confidence = "0";
1378                                 } 
1379                                 float con = 0;
1380                                 convert(confidence, con);
1381                                 
1382                                 map<string, float> temp;
1383                                 temp[newtaxon] = con;
1384                 
1385                                 t.push_back(temp);
1386                                 taxon = "";
1387                         }
1388                         else{
1389                                 taxon += tax[i];
1390                 
1391                         }
1392                 }
1393                 
1394                 return t;
1395         }
1396         catch(exception& e) {
1397                 m->errorOut(e, "GetLineageCommand", "getTaxons");
1398                 exit(1);
1399         }
1400 }
1401 //**********************************************************************************************************************
1402 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
1403 int GetLineageCommand::readAlign(){
1404         try {
1405                 string thisOutputDir = outputDir;
1406                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
1407         map<string, string> variables; 
1408                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1409         variables["[extension]"] = m->getExtension(alignfile);
1410                 string outputFileName = getOutputFileName("alignreport", variables);
1411                 
1412                 ofstream out;
1413                 m->openOutputFile(outputFileName, out);
1414                 
1415
1416                 ifstream in;
1417                 m->openInputFile(alignfile, in);
1418                 string name, junk;
1419                 
1420                 bool wroteSomething = false;
1421                 
1422                 //read column headers
1423                 for (int i = 0; i < 16; i++) {  
1424                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1425                         else                    {       break;                  }
1426                 }
1427                 out << endl;
1428                 
1429                 while(!in.eof()){
1430                 
1431                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1432
1433
1434                         in >> name;                             //read from first column
1435                         
1436                         //if this name is in the accnos file
1437                         if (names.count(name) != 0) {
1438                                 wroteSomething = true;
1439                                 
1440                                 out << name << '\t';
1441                                 
1442                                 //read rest
1443                                 for (int i = 0; i < 15; i++) {  
1444                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1445                                         else                    {       break;                  }
1446                                 }
1447                                 out << endl;
1448                                 
1449                         }else {//still read just don't do anything with it
1450                                 //read rest
1451                                 for (int i = 0; i < 15; i++) {  
1452                                         if (!in.eof())  {       in >> junk;             }
1453                                         else                    {       break;                  }
1454                                 }
1455                         }
1456                         
1457                         m->gobble(in);
1458                 }
1459                 in.close();
1460                 out.close();
1461                 
1462                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
1463                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
1464                 
1465                 return 0;
1466                 
1467         }
1468         catch(exception& e) {
1469                 m->errorOut(e, "GetLineageCommand", "readAlign");
1470                 exit(1);
1471         }
1472 }
1473 //**********************************************************************************************************************
1474
1475
1476
1477