]> git.donarmstrong.com Git - mothur.git/blob - getlineagecommand.cpp
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
[mothur.git] / getlineagecommand.cpp
1 /*
2  *  getlineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetLineageCommand::setParameters(){      
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
23                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
24                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "GetLineageCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string GetLineageCommand::getHelpString(){      
40         try {
41                 string helpString = "";
42                 helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
43                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
44                 helpString += "The get.lineage command parameters are taxon, fasta, name, group, count, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
45                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
46                 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
47                 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
48                 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
49                 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
50                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
51                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
52                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "GetLineageCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string GetLineageCommand::getOutputFileNameTag(string type, string inputName=""){       
63         try {
64         string outputFileName = "";
65                 map<string, vector<string> >::iterator it;
66         
67         //is this a type this command creates
68         it = outputTypes.find(type);
69         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
70         else {
71             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "count")       {   outputFileName =  "pick.count.table";                    }  
75             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
76             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
77             else if (type == "alignreport") {   outputFileName =  "pick.align.report";   }
78             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
79         }
80         return outputFileName;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "GetLineageCommand", "getOutputFileNameTag");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 GetLineageCommand::GetLineageCommand(){ 
89         try {
90                 abort = true; calledHelp = true; 
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["fasta"] = tempOutNames;
94                 outputTypes["taxonomy"] = tempOutNames;
95                 outputTypes["name"] = tempOutNames;
96                 outputTypes["group"] = tempOutNames;
97                 outputTypes["alignreport"] = tempOutNames;
98                 outputTypes["list"] = tempOutNames;
99         outputTypes["count"] = tempOutNames;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 GetLineageCommand::GetLineageCommand(string option)  {
108         try {
109                 abort = false; calledHelp = false;   
110                                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         
121                         ValidParameters validParameter;
122                         map<string,string>::iterator it;
123                         
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["fasta"] = tempOutNames;
132                         outputTypes["taxonomy"] = tempOutNames;
133                         outputTypes["name"] = tempOutNames;
134                         outputTypes["group"] = tempOutNames;
135                         outputTypes["alignreport"] = tempOutNames;
136                         outputTypes["list"] = tempOutNames;
137             outputTypes["count"] = tempOutNames;
138
139                         //if the user changes the output directory command factory will send this info to us in the output parameter 
140                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
141                         
142                         //if the user changes the input directory command factory will send this info to us in the output parameter 
143                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
144                         if (inputDir == "not found"){   inputDir = "";          }
145                         else {
146                                 string path;
147                                 it = parameters.find("alignreport");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
153                                 }
154                                 
155                                 it = parameters.find("fasta");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
161                                 }
162                                 
163                                 it = parameters.find("list");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
169                                 }
170                                 
171                                 it = parameters.find("name");
172                                 //user has given a template file
173                                 if(it != parameters.end()){ 
174                                         path = m->hasPath(it->second);
175                                         //if the user has not given a path then, add inputdir. else leave path alone.
176                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
177                                 }
178                                 
179                                 it = parameters.find("group");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
185                                 }
186                                 
187                                 it = parameters.find("taxonomy");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
193                                 }
194                 
195                 it = parameters.find("count");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
201                                 }
202                         }
203
204                         
205                         //check for required parameters                 
206                         fastafile = validParameter.validFile(parameters, "fasta", true);
207                         if (fastafile == "not open") { fastafile = ""; abort = true; }
208                         else if (fastafile == "not found") {  fastafile = "";  }
209                         else { m->setFastaFile(fastafile); }
210                         
211                         namefile = validParameter.validFile(parameters, "name", true);
212                         if (namefile == "not open") { namefile = ""; abort = true; }
213                         else if (namefile == "not found") {  namefile = "";  }  
214                         else { m->setNameFile(namefile); }
215                         
216                         groupfile = validParameter.validFile(parameters, "group", true);
217                         if (groupfile == "not open") { abort = true; }
218                         else if (groupfile == "not found") {  groupfile = "";  }        
219                         else { m->setGroupFile(groupfile); }
220                         
221                         alignfile = validParameter.validFile(parameters, "alignreport", true);
222                         if (alignfile == "not open") { abort = true; }
223                         else if (alignfile == "not found") {  alignfile = "";  }
224                         
225                         listfile = validParameter.validFile(parameters, "list", true);
226                         if (listfile == "not open") { abort = true; }
227                         else if (listfile == "not found") {  listfile = "";  }
228                         else { m->setListFile(listfile); }
229                         
230                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
231                         if (taxfile == "not open") { taxfile = ""; abort = true; }
232                         else if (taxfile == "not found") {                              
233                                 taxfile = m->getTaxonomyFile(); 
234                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
235                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
236                         }else { m->setTaxonomyFile(taxfile); }
237                         
238                         string usedDups = "true";
239                         string temp = validParameter.validFile(parameters, "dups", false);      
240                         if (temp == "not found") { 
241                                 if (namefile != "") {  temp = "true";                                   }
242                                 else                            {  temp = "false"; usedDups = "";       }
243                         }
244                         dups = m->isTrue(temp);
245             
246             countfile = validParameter.validFile(parameters, "count", true);
247             if (countfile == "not open") { countfile = ""; abort = true; }
248             else if (countfile == "not found") { countfile = "";  }     
249             else { m->setCountTableFile(countfile); }
250             
251             if ((namefile != "") && (countfile != "")) {
252                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
253             }
254             
255             if ((groupfile != "") && (countfile != "")) {
256                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
257             }
258                         
259                         taxons = validParameter.validFile(parameters, "taxon", false);  
260                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
261                         else { 
262                                 //rip off quotes
263                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
264                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
265                         }
266                         m->splitAtChar(taxons, listOfTaxons, '-');
267                         
268                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
269                 
270             if (countfile == "") {
271                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
272                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
273                     parser.getNameFile(files);
274                 }
275             }
276                 }
277
278         }
279         catch(exception& e) {
280                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
281                 exit(1);
282         }
283 }
284 //**********************************************************************************************************************
285
286 int GetLineageCommand::execute(){
287         try {
288                 
289                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
290                 
291                 if (m->control_pressed) { return 0; }
292         
293         if (countfile != "") {
294             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
295                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
296             }
297         }
298                 
299                 //read through the correct file and output lines you want to keep
300                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to get
301                 if (namefile != "")                     {               readName();             }
302                 if (fastafile != "")            {               readFasta();    }
303         if (countfile != "")            {               readCount();    }
304                 if (groupfile != "")            {               readGroup();    }
305                 if (alignfile != "")            {               readAlign();    }
306                 if (listfile != "")                     {               readList();             }
307                 
308                 
309                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
310                 
311                 if (outputNames.size() != 0) {
312                         m->mothurOutEndLine();
313                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
314                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
315                         m->mothurOutEndLine();
316                         
317                         //set fasta file as new current fastafile
318                         string current = "";
319                         itTypes = outputTypes.find("fasta");
320                         if (itTypes != outputTypes.end()) {
321                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
322                         }
323                         
324                         itTypes = outputTypes.find("name");
325                         if (itTypes != outputTypes.end()) {
326                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
327                         }
328                         
329                         itTypes = outputTypes.find("group");
330                         if (itTypes != outputTypes.end()) {
331                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
332                         }
333                         
334                         itTypes = outputTypes.find("list");
335                         if (itTypes != outputTypes.end()) {
336                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
337                         }
338                         
339                         itTypes = outputTypes.find("taxonomy");
340                         if (itTypes != outputTypes.end()) {
341                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
342                         }
343                         
344             itTypes = outputTypes.find("count");
345                         if (itTypes != outputTypes.end()) {
346                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
347                         }
348                 }
349                 
350                 return 0;               
351         }
352
353         catch(exception& e) {
354                 m->errorOut(e, "GetLineageCommand", "execute");
355                 exit(1);
356         }
357 }
358
359 //**********************************************************************************************************************
360 int GetLineageCommand::readFasta(){
361         try {
362                 string thisOutputDir = outputDir;
363                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
364                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
365                 ofstream out;
366                 m->openOutputFile(outputFileName, out);
367                 
368                 
369                 ifstream in;
370                 m->openInputFile(fastafile, in);
371                 string name;
372                 
373                 bool wroteSomething = false;
374                 
375                 while(!in.eof()){
376                 
377                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
378                         
379                         Sequence currSeq(in);
380                         name = currSeq.getName();
381                         
382                         if (name != "") {
383                                 //if this name is in the accnos file
384                                 if (names.count(name) != 0) {
385                                         wroteSomething = true;
386                                         
387                                         currSeq.printSequence(out);
388                                 }
389                         }
390                         m->gobble(in);
391                 }
392                 in.close();     
393                 out.close();
394                 
395                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
396                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
397                 
398                 return 0;
399
400         }
401         catch(exception& e) {
402                 m->errorOut(e, "GetLineageCommand", "readFasta");
403                 exit(1);
404         }
405 }
406 //**********************************************************************************************************************
407 int GetLineageCommand::readCount(){
408         try {
409                 string thisOutputDir = outputDir;
410                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
411                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
412                 
413                 ofstream out;
414                 m->openOutputFile(outputFileName, out);
415                 
416                 ifstream in;
417                 m->openInputFile(countfile, in);
418                 
419                 bool wroteSomething = false;
420                 
421         string headers = m->getline(in); m->gobble(in);
422         out << headers << endl;
423         
424         string name, rest; int thisTotal;
425         while (!in.eof()) {
426             
427             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
428             
429             in >> name; m->gobble(in); 
430             in >> thisTotal; m->gobble(in);
431             rest = m->getline(in); m->gobble(in);
432             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
433             
434             if (names.count(name) != 0) {
435                 out << name << '\t' << thisTotal << '\t' << rest << endl;
436                 wroteSomething = true;
437             }
438         }
439         in.close();
440                 out.close();
441                 
442                 if (wroteSomething == false) {  m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
443                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
444                        
445                 return 0;
446         }
447         catch(exception& e) {
448                 m->errorOut(e, "GetLineageCommand", "readCount");
449                 exit(1);
450         }
451 }
452 //**********************************************************************************************************************
453 int GetLineageCommand::readList(){
454         try {
455                 string thisOutputDir = outputDir;
456                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
457                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
458                 ofstream out;
459                 m->openOutputFile(outputFileName, out);
460                 
461                 ifstream in;
462                 m->openInputFile(listfile, in);
463                 
464                 bool wroteSomething = false;
465                 
466                 while(!in.eof()){
467                         
468                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
469
470                         //read in list vector
471                         ListVector list(in);
472                         
473                         //make a new list vector
474                         ListVector newList;
475                         newList.setLabel(list.getLabel());
476                         
477                         //for each bin
478                         for (int i = 0; i < list.getNumBins(); i++) {
479                         
480                                 //parse out names that are in accnos file
481                                 string binnames = list.get(i);
482                                 
483                                 string newNames = "";
484                                 while (binnames.find_first_of(',') != -1) { 
485                                         string name = binnames.substr(0,binnames.find_first_of(','));
486                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
487                                         
488                                         //if that name is in the .accnos file, add it
489                                         if (names.count(name) != 0) {  newNames += name + ",";  }
490                                 }
491                         
492                                 //get last name
493                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
494
495                                 //if there are names in this bin add to new list
496                                 if (newNames != "") { 
497                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
498                                         newList.push_back(newNames);    
499                                 }
500                         }
501                                 
502                         //print new listvector
503                         if (newList.getNumBins() != 0) {
504                                 wroteSomething = true;
505                                 newList.print(out);
506                         }
507                         
508                         m->gobble(in);
509                 }
510                 in.close();     
511                 out.close();
512                 
513                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
514                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
515                 
516                 return 0;
517
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "GetLineageCommand", "readList");
521                 exit(1);
522         }
523 }
524 //**********************************************************************************************************************
525 int GetLineageCommand::readName(){
526         try {
527                 string thisOutputDir = outputDir;
528                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
529                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
530                 ofstream out;
531                 m->openOutputFile(outputFileName, out);
532                 
533
534                 ifstream in;
535                 m->openInputFile(namefile, in);
536                 string name, firstCol, secondCol;
537                 
538                 bool wroteSomething = false;
539                 
540                 
541                 while(!in.eof()){
542                 
543                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
544
545                         in >> firstCol;                         
546                         in >> secondCol;
547                         
548                         string hold = "";
549                         if (dups) { hold = secondCol; }
550                         
551                         vector<string> parsedNames;
552                         m->splitAtComma(secondCol, parsedNames);
553                         
554                         vector<string> validSecond;
555                         for (int i = 0; i < parsedNames.size(); i++) {
556                                 if (names.count(parsedNames[i]) != 0) {
557                                         validSecond.push_back(parsedNames[i]);
558                                 }
559                         }
560
561                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
562                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
563                                 out << firstCol << '\t' << hold << endl;
564                                 wroteSomething = true;
565                         }else {
566                                 //if the name in the first column is in the set then print it and any other names in second column also in set
567                                 if (names.count(firstCol) != 0) {
568                                 
569                                         wroteSomething = true;
570                                         
571                                         out << firstCol << '\t';
572                                         
573                                         //you know you have at least one valid second since first column is valid
574                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
575                                         out << validSecond[validSecond.size()-1] << endl;
576                                         
577                                 
578                                 //make first name in set you come to first column and then add the remaining names to second column
579                                 }else {
580                                         //you want part of this row
581                                         if (validSecond.size() != 0) {
582                                         
583                                                 wroteSomething = true;
584                                                 
585                                                 out << validSecond[0] << '\t';
586                                         
587                                                 //you know you have at least one valid second since first column is valid
588                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
589                                                 out << validSecond[validSecond.size()-1] << endl;
590                                         }
591                                 }
592                         }
593                         m->gobble(in);
594                 }
595                 in.close();
596                 out.close();
597                 
598                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
599                 outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
600                 
601                 return 0;
602                 
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "GetLineageCommand", "readName");
606                 exit(1);
607         }
608 }
609
610 //**********************************************************************************************************************
611 int GetLineageCommand::readGroup(){
612         try {
613                 string thisOutputDir = outputDir;
614                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
615                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
616                 ofstream out;
617                 m->openOutputFile(outputFileName, out);
618                 
619
620                 ifstream in;
621                 m->openInputFile(groupfile, in);
622                 string name, group;
623                 
624                 bool wroteSomething = false;
625                 
626                 while(!in.eof()){
627
628                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
629
630
631                         in >> name;                             //read from first column
632                         in >> group;                    //read from second column
633                         
634                         //if this name is in the accnos file
635                         if (names.count(name) != 0) {
636                                 wroteSomething = true;
637                                 
638                                 out << name << '\t' << group << endl;
639                         }
640                                         
641                         m->gobble(in);
642                 }
643                 in.close();
644                 out.close();
645                 
646                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
647                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
648                 
649                 return 0;
650
651         }
652         catch(exception& e) {
653                 m->errorOut(e, "GetLineageCommand", "readGroup");
654                 exit(1);
655         }
656 }
657 //**********************************************************************************************************************
658 int GetLineageCommand::readTax(){
659         try {
660                 string thisOutputDir = outputDir;
661                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
662                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
663                 ofstream out;
664                 m->openOutputFile(outputFileName, out);
665                 
666                 ifstream in;
667                 m->openInputFile(taxfile, in);
668                 string name, tax;
669                 
670                 //bool wroteSomething = false;
671                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
672                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
673                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
674                 
675                 for (int i = 0; i < listOfTaxons.size(); i++) {
676                         noConfidenceTaxons[i] = listOfTaxons[i];
677                         int hasConPos = listOfTaxons[i].find_first_of('(');
678                         if (hasConPos != string::npos) {  
679                                 taxonsHasConfidence[i] = true; 
680                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
681                                 noConfidenceTaxons[i] = listOfTaxons[i];
682                                 m->removeConfidences(noConfidenceTaxons[i]);
683                         }
684                 }
685                 
686                 
687                 while(!in.eof()){
688
689                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
690
691                         in >> name;                             //read from first column
692                         in >> tax;                      //read from second column
693                         
694                         for (int j = 0; j < listOfTaxons.size(); j++) {
695                                                         
696                                 string newtax = tax;
697                         
698                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
699                                 if (!taxonsHasConfidence[j]) {
700                                         int hasConfidences = tax.find_first_of('(');
701                                         if (hasConfidences != string::npos) { 
702                                                 newtax = tax;
703                                                 m->removeConfidences(newtax);
704                                         }
705                                 
706                                         int pos = newtax.find(noConfidenceTaxons[j]);
707                                 
708                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
709                                                 names.insert(name); 
710                                                 out << name << '\t' << tax << endl;
711                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
712                                                 break;
713                                         }
714                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
715                                         int hasConfidences = tax.find_first_of('(');
716                                         if (hasConfidences == string::npos) { 
717                                         
718                                                 int pos = newtax.find(noConfidenceTaxons[j]);
719                                         
720                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
721                                                         names.insert(name);
722                                                         out << name << '\t' << tax << endl;
723                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
724                                                         break;
725                                                 }
726                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
727                                                 //first remove confidences from both and see if the taxonomy exists
728                                         
729                                                 string noNewTax = tax;
730                                                 int hasConfidences = tax.find_first_of('(');
731                                                 if (hasConfidences != string::npos) { 
732                                                         noNewTax = tax;
733                                                         m->removeConfidences(noNewTax);
734                                                 }
735                                         
736                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
737                                         
738                                                 if (pos != string::npos) { //if yes, then are the confidences okay
739                                                 
740                                                         bool good = true;
741                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
742                                                 
743                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
744                                                         //we want to "line them up", so we will find the the index where the searchstring starts
745                                                         int index = 0;
746                                                         for (int i = 0; i < usersTaxon.size(); i++) {
747                                                         
748                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
749                                                                         index = i;  
750                                                                         int spot = 0;
751                                                                         bool goodspot = true;
752                                                                         //is this really the start, or are we dealing with a taxon of the same name?
753                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
754                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
755                                                                                 else { spot++; }
756                                                                         }
757                                                                 
758                                                                         if (goodspot) { break; }
759                                                                 }
760                                                         }
761                                                 
762                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
763                                                         
764                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
765                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
766                                                                                 good = false;
767                                                                                 break;
768                                                                         }
769                                                                 }else {
770                                                                         good = false;
771                                                                         break;
772                                                                 }
773                                                         }
774                                                 
775                                                         //passed the test so add you
776                                                         if (good) {
777                                                                 names.insert(name);
778                                                                 out << name << '\t' << tax << endl;
779                                                                 break;
780                                                         }
781                                                 }
782                                         }
783                                 }
784                         
785                         }
786                         
787                         m->gobble(in);
788                 }
789                 in.close();
790                 out.close();
791                 
792                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
793                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
794                         
795                 return 0;
796
797         }
798         catch(exception& e) {
799                 m->errorOut(e, "GetLineageCommand", "readTax");
800                 exit(1);
801         }
802 }
803 /**************************************************************************************************/
804 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
805         try {
806                 
807                 vector< map<string, float> > t;
808                 string taxon = "";
809                 int taxLength = tax.length();
810                 for(int i=0;i<taxLength;i++){
811                         if(tax[i] == ';'){
812                 
813                                 int openParen = taxon.find_first_of('(');
814                                 int closeParen = taxon.find_last_of(')');
815                                 
816                                 string newtaxon, confidence;
817                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
818                                         newtaxon = taxon.substr(0, openParen); //rip off confidence
819                                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
820                                 }else{
821                                         newtaxon = taxon;
822                                         confidence = "0";
823                                 } 
824                                 float con = 0;
825                                 convert(confidence, con);
826                                 
827                                 map<string, float> temp;
828                                 temp[newtaxon] = con;
829                                 t.push_back(temp);
830                                 
831                                 taxon = "";
832                         }
833                         else{
834                                 taxon += tax[i];
835                         }
836                 }
837                 
838                 return t;
839         }
840         catch(exception& e) {
841                 m->errorOut(e, "GetLineageCommand", "getTaxons");
842                 exit(1);
843         }
844 }
845 //**********************************************************************************************************************
846 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
847 int GetLineageCommand::readAlign(){
848         try {
849                 string thisOutputDir = outputDir;
850                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
851                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
852                 ofstream out;
853                 m->openOutputFile(outputFileName, out);
854                 
855
856                 ifstream in;
857                 m->openInputFile(alignfile, in);
858                 string name, junk;
859                 
860                 bool wroteSomething = false;
861                 
862                 //read column headers
863                 for (int i = 0; i < 16; i++) {  
864                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
865                         else                    {       break;                  }
866                 }
867                 out << endl;
868                 
869                 while(!in.eof()){
870                 
871                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
872
873
874                         in >> name;                             //read from first column
875                         
876                         //if this name is in the accnos file
877                         if (names.count(name) != 0) {
878                                 wroteSomething = true;
879                                 
880                                 out << name << '\t';
881                                 
882                                 //read rest
883                                 for (int i = 0; i < 15; i++) {  
884                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
885                                         else                    {       break;                  }
886                                 }
887                                 out << endl;
888                                 
889                         }else {//still read just don't do anything with it
890                                 //read rest
891                                 for (int i = 0; i < 15; i++) {  
892                                         if (!in.eof())  {       in >> junk;             }
893                                         else                    {       break;                  }
894                                 }
895                         }
896                         
897                         m->gobble(in);
898                 }
899                 in.close();
900                 out.close();
901                 
902                 if (wroteSomething == false) { m->mothurOut("Your file contains does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
903                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
904                 
905                 return 0;
906                 
907         }
908         catch(exception& e) {
909                 m->errorOut(e, "GetLineageCommand", "readAlign");
910                 exit(1);
911         }
912 }
913 //**********************************************************************************************************************
914
915
916
917