]> git.donarmstrong.com Git - mothur.git/blob - getlineagecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / getlineagecommand.cpp
1 /*
2  *  getlineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getlineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> GetLineageCommand::setParameters(){      
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
24                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "GetLineageCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string GetLineageCommand::getHelpString(){      
39         try {
40                 string helpString = "";
41                 helpString += "The get.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
42                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are from the taxon requested.\n";
43                 helpString += "The get.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
44                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
45                 helpString += "The taxon parameter allows you to select the taxons you would like to get and is required.\n";
46                 helpString += "You may enter your taxons with confidence scores, doing so will get only those sequences that belong to the taxonomy and whose cofidence scores is above the scores you give.\n";
47                 helpString += "If they belong to the taxonomy and have confidences below those you provide the sequence will not be selected.\n";
48                 helpString += "The get.lineage command should be in the following format: get.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
49                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
50                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
51                 helpString += "Example get.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "GetLineageCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string GetLineageCommand::getOutputFileNameTag(string type, string inputName=""){       
62         try {
63         string outputFileName = "";
64                 map<string, vector<string> >::iterator it;
65         
66         //is this a type this command creates
67         it = outputTypes.find(type);
68         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
69         else {
70             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
71             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
75             else if (type == "alignreport") {   outputFileName =  "pick.align.report";   }
76             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
77         }
78         return outputFileName;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "GetLineageCommand", "getOutputFileNameTag");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 GetLineageCommand::GetLineageCommand(){ 
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["fasta"] = tempOutNames;
92                 outputTypes["taxonomy"] = tempOutNames;
93                 outputTypes["name"] = tempOutNames;
94                 outputTypes["group"] = tempOutNames;
95                 outputTypes["alignreport"] = tempOutNames;
96                 outputTypes["list"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 GetLineageCommand::GetLineageCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["fasta"] = tempOutNames;
129                         outputTypes["taxonomy"] = tempOutNames;
130                         outputTypes["name"] = tempOutNames;
131                         outputTypes["group"] = tempOutNames;
132                         outputTypes["alignreport"] = tempOutNames;
133                         outputTypes["list"] = tempOutNames;
134
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("alignreport");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
149                                 }
150                                 
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("list");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
165                                 }
166                                 
167                                 it = parameters.find("name");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
173                                 }
174                                 
175                                 it = parameters.find("group");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
181                                 }
182                                 
183                                 it = parameters.find("taxonomy");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
189                                 }
190                         }
191
192                         
193                         //check for required parameters                 
194                         fastafile = validParameter.validFile(parameters, "fasta", true);
195                         if (fastafile == "not open") { fastafile = ""; abort = true; }
196                         else if (fastafile == "not found") {  fastafile = "";  }
197                         else { m->setFastaFile(fastafile); }
198                         
199                         namefile = validParameter.validFile(parameters, "name", true);
200                         if (namefile == "not open") { namefile = ""; abort = true; }
201                         else if (namefile == "not found") {  namefile = "";  }  
202                         else { m->setNameFile(namefile); }
203                         
204                         groupfile = validParameter.validFile(parameters, "group", true);
205                         if (groupfile == "not open") { abort = true; }
206                         else if (groupfile == "not found") {  groupfile = "";  }        
207                         else { m->setGroupFile(groupfile); }
208                         
209                         alignfile = validParameter.validFile(parameters, "alignreport", true);
210                         if (alignfile == "not open") { abort = true; }
211                         else if (alignfile == "not found") {  alignfile = "";  }
212                         
213                         listfile = validParameter.validFile(parameters, "list", true);
214                         if (listfile == "not open") { abort = true; }
215                         else if (listfile == "not found") {  listfile = "";  }
216                         else { m->setListFile(listfile); }
217                         
218                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
219                         if (taxfile == "not open") { taxfile = ""; abort = true; }
220                         else if (taxfile == "not found") {                              
221                                 taxfile = m->getTaxonomyFile(); 
222                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
223                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
224                         }else { m->setTaxonomyFile(taxfile); }
225                         
226                         string usedDups = "true";
227                         string temp = validParameter.validFile(parameters, "dups", false);      
228                         if (temp == "not found") { 
229                                 if (namefile != "") {  temp = "true";                                   }
230                                 else                            {  temp = "false"; usedDups = "";       }
231                         }
232                         dups = m->isTrue(temp);
233                         
234                         taxons = validParameter.validFile(parameters, "taxon", false);  
235                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
236                         else { 
237                                 //rip off quotes
238                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
239                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
240                         }
241                         m->splitAtChar(taxons, listOfTaxons, '-');
242                         
243                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
244                 
245                         if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
246                                 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
247                                 parser.getNameFile(files);
248                         }
249                 }
250
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "GetLineageCommand", "GetLineageCommand");
254                 exit(1);
255         }
256 }
257 //**********************************************************************************************************************
258
259 int GetLineageCommand::execute(){
260         try {
261                 
262                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
263                 
264                 if (m->control_pressed) { return 0; }
265                 
266                 //read through the correct file and output lines you want to keep
267                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to get
268                 if (namefile != "")                     {               readName();             }
269                 if (fastafile != "")            {               readFasta();    }
270                 if (groupfile != "")            {               readGroup();    }
271                 if (alignfile != "")            {               readAlign();    }
272                 if (listfile != "")                     {               readList();             }
273                 
274                 
275                 if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);  } return 0; }
276                 
277                 if (outputNames.size() != 0) {
278                         m->mothurOutEndLine();
279                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
280                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
281                         m->mothurOutEndLine();
282                         
283                         //set fasta file as new current fastafile
284                         string current = "";
285                         itTypes = outputTypes.find("fasta");
286                         if (itTypes != outputTypes.end()) {
287                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
288                         }
289                         
290                         itTypes = outputTypes.find("name");
291                         if (itTypes != outputTypes.end()) {
292                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
293                         }
294                         
295                         itTypes = outputTypes.find("group");
296                         if (itTypes != outputTypes.end()) {
297                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
298                         }
299                         
300                         itTypes = outputTypes.find("list");
301                         if (itTypes != outputTypes.end()) {
302                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
303                         }
304                         
305                         itTypes = outputTypes.find("taxonomy");
306                         if (itTypes != outputTypes.end()) {
307                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
308                         }                       
309                 }
310                 
311                 return 0;               
312         }
313
314         catch(exception& e) {
315                 m->errorOut(e, "GetLineageCommand", "execute");
316                 exit(1);
317         }
318 }
319
320 //**********************************************************************************************************************
321 int GetLineageCommand::readFasta(){
322         try {
323                 string thisOutputDir = outputDir;
324                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
325                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
326                 ofstream out;
327                 m->openOutputFile(outputFileName, out);
328                 
329                 
330                 ifstream in;
331                 m->openInputFile(fastafile, in);
332                 string name;
333                 
334                 bool wroteSomething = false;
335                 
336                 while(!in.eof()){
337                 
338                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
339                         
340                         Sequence currSeq(in);
341                         name = currSeq.getName();
342                         
343                         if (name != "") {
344                                 //if this name is in the accnos file
345                                 if (names.count(name) != 0) {
346                                         wroteSomething = true;
347                                         
348                                         currSeq.printSequence(out);
349                                 }
350                         }
351                         m->gobble(in);
352                 }
353                 in.close();     
354                 out.close();
355                 
356                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
357                 outputNames.push_back(outputFileName);  outputTypes["fasta"].push_back(outputFileName);
358                 
359                 return 0;
360
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "GetLineageCommand", "readFasta");
364                 exit(1);
365         }
366 }
367 //**********************************************************************************************************************
368 int GetLineageCommand::readList(){
369         try {
370                 string thisOutputDir = outputDir;
371                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
372                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
373                 ofstream out;
374                 m->openOutputFile(outputFileName, out);
375                 
376                 ifstream in;
377                 m->openInputFile(listfile, in);
378                 
379                 bool wroteSomething = false;
380                 
381                 while(!in.eof()){
382                         
383                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
384
385                         //read in list vector
386                         ListVector list(in);
387                         
388                         //make a new list vector
389                         ListVector newList;
390                         newList.setLabel(list.getLabel());
391                         
392                         //for each bin
393                         for (int i = 0; i < list.getNumBins(); i++) {
394                         
395                                 //parse out names that are in accnos file
396                                 string binnames = list.get(i);
397                                 
398                                 string newNames = "";
399                                 while (binnames.find_first_of(',') != -1) { 
400                                         string name = binnames.substr(0,binnames.find_first_of(','));
401                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
402                                         
403                                         //if that name is in the .accnos file, add it
404                                         if (names.count(name) != 0) {  newNames += name + ",";  }
405                                 }
406                         
407                                 //get last name
408                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
409
410                                 //if there are names in this bin add to new list
411                                 if (newNames != "") { 
412                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
413                                         newList.push_back(newNames);    
414                                 }
415                         }
416                                 
417                         //print new listvector
418                         if (newList.getNumBins() != 0) {
419                                 wroteSomething = true;
420                                 newList.print(out);
421                         }
422                         
423                         m->gobble(in);
424                 }
425                 in.close();     
426                 out.close();
427                 
428                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
429                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
430                 
431                 return 0;
432
433         }
434         catch(exception& e) {
435                 m->errorOut(e, "GetLineageCommand", "readList");
436                 exit(1);
437         }
438 }
439 //**********************************************************************************************************************
440 int GetLineageCommand::readName(){
441         try {
442                 string thisOutputDir = outputDir;
443                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
444                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
445                 ofstream out;
446                 m->openOutputFile(outputFileName, out);
447                 
448
449                 ifstream in;
450                 m->openInputFile(namefile, in);
451                 string name, firstCol, secondCol;
452                 
453                 bool wroteSomething = false;
454                 
455                 
456                 while(!in.eof()){
457                 
458                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
459
460                         in >> firstCol;                         
461                         in >> secondCol;
462                         
463                         string hold = "";
464                         if (dups) { hold = secondCol; }
465                         
466                         vector<string> parsedNames;
467                         m->splitAtComma(secondCol, parsedNames);
468                         
469                         vector<string> validSecond;
470                         for (int i = 0; i < parsedNames.size(); i++) {
471                                 if (names.count(parsedNames[i]) != 0) {
472                                         validSecond.push_back(parsedNames[i]);
473                                 }
474                         }
475
476                         if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
477                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
478                                 out << firstCol << '\t' << hold << endl;
479                                 wroteSomething = true;
480                         }else {
481                                 //if the name in the first column is in the set then print it and any other names in second column also in set
482                                 if (names.count(firstCol) != 0) {
483                                 
484                                         wroteSomething = true;
485                                         
486                                         out << firstCol << '\t';
487                                         
488                                         //you know you have at least one valid second since first column is valid
489                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
490                                         out << validSecond[validSecond.size()-1] << endl;
491                                         
492                                 
493                                 //make first name in set you come to first column and then add the remaining names to second column
494                                 }else {
495                                         //you want part of this row
496                                         if (validSecond.size() != 0) {
497                                         
498                                                 wroteSomething = true;
499                                                 
500                                                 out << validSecond[0] << '\t';
501                                         
502                                                 //you know you have at least one valid second since first column is valid
503                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
504                                                 out << validSecond[validSecond.size()-1] << endl;
505                                         }
506                                 }
507                         }
508                         m->gobble(in);
509                 }
510                 in.close();
511                 out.close();
512                 
513                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
514                 outputNames.push_back(outputFileName);  outputTypes["name"].push_back(outputFileName);
515                 
516                 return 0;
517                 
518         }
519         catch(exception& e) {
520                 m->errorOut(e, "GetLineageCommand", "readName");
521                 exit(1);
522         }
523 }
524
525 //**********************************************************************************************************************
526 int GetLineageCommand::readGroup(){
527         try {
528                 string thisOutputDir = outputDir;
529                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
530                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
531                 ofstream out;
532                 m->openOutputFile(outputFileName, out);
533                 
534
535                 ifstream in;
536                 m->openInputFile(groupfile, in);
537                 string name, group;
538                 
539                 bool wroteSomething = false;
540                 
541                 while(!in.eof()){
542
543                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
544
545
546                         in >> name;                             //read from first column
547                         in >> group;                    //read from second column
548                         
549                         //if this name is in the accnos file
550                         if (names.count(name) != 0) {
551                                 wroteSomething = true;
552                                 
553                                 out << name << '\t' << group << endl;
554                         }
555                                         
556                         m->gobble(in);
557                 }
558                 in.close();
559                 out.close();
560                 
561                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
562                 outputNames.push_back(outputFileName);  outputTypes["group"].push_back(outputFileName);
563                 
564                 return 0;
565
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "GetLineageCommand", "readGroup");
569                 exit(1);
570         }
571 }
572 //**********************************************************************************************************************
573 int GetLineageCommand::readTax(){
574         try {
575                 string thisOutputDir = outputDir;
576                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
577                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
578                 ofstream out;
579                 m->openOutputFile(outputFileName, out);
580                 
581                 ifstream in;
582                 m->openInputFile(taxfile, in);
583                 string name, tax;
584                 
585                 //bool wroteSomething = false;
586                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
587                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
588                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
589                 
590                 for (int i = 0; i < listOfTaxons.size(); i++) {
591                         noConfidenceTaxons[i] = listOfTaxons[i];
592                         int hasConPos = listOfTaxons[i].find_first_of('(');
593                         if (hasConPos != string::npos) {  
594                                 taxonsHasConfidence[i] = true; 
595                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
596                                 noConfidenceTaxons[i] = listOfTaxons[i];
597                                 m->removeConfidences(noConfidenceTaxons[i]);
598                         }
599                 }
600                 
601                 
602                 while(!in.eof()){
603
604                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
605
606                         in >> name;                             //read from first column
607                         in >> tax;                      //read from second column
608                         
609                         for (int j = 0; j < listOfTaxons.size(); j++) {
610                                                         
611                                 string newtax = tax;
612                         
613                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
614                                 if (!taxonsHasConfidence[j]) {
615                                         int hasConfidences = tax.find_first_of('(');
616                                         if (hasConfidences != string::npos) { 
617                                                 newtax = tax;
618                                                 m->removeConfidences(newtax);
619                                         }
620                                 
621                                         int pos = newtax.find(noConfidenceTaxons[j]);
622                                 
623                                         if (pos != string::npos) { //this sequence contains the taxon the user wants
624                                                 names.insert(name); 
625                                                 out << name << '\t' << tax << endl;
626                                                 //since you belong to at least one of the taxons we want you are included so no need to search for other
627                                                 break;
628                                         }
629                                 }else{//if listOfTaxons[i] has them and you don't them remove taxons
630                                         int hasConfidences = tax.find_first_of('(');
631                                         if (hasConfidences == string::npos) { 
632                                         
633                                                 int pos = newtax.find(noConfidenceTaxons[j]);
634                                         
635                                                 if (pos != string::npos) { //this sequence contains the taxon the user wants
636                                                         names.insert(name);
637                                                         out << name << '\t' << tax << endl;
638                                                         //since you belong to at least one of the taxons we want you are included so no need to search for other
639                                                         break;
640                                                 }
641                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
642                                                 //first remove confidences from both and see if the taxonomy exists
643                                         
644                                                 string noNewTax = tax;
645                                                 int hasConfidences = tax.find_first_of('(');
646                                                 if (hasConfidences != string::npos) { 
647                                                         noNewTax = tax;
648                                                         m->removeConfidences(noNewTax);
649                                                 }
650                                         
651                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
652                                         
653                                                 if (pos != string::npos) { //if yes, then are the confidences okay
654                                                 
655                                                         bool good = true;
656                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
657                                                 
658                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
659                                                         //we want to "line them up", so we will find the the index where the searchstring starts
660                                                         int index = 0;
661                                                         for (int i = 0; i < usersTaxon.size(); i++) {
662                                                         
663                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
664                                                                         index = i;  
665                                                                         int spot = 0;
666                                                                         bool goodspot = true;
667                                                                         //is this really the start, or are we dealing with a taxon of the same name?
668                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
669                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
670                                                                                 else { spot++; }
671                                                                         }
672                                                                 
673                                                                         if (goodspot) { break; }
674                                                                 }
675                                                         }
676                                                 
677                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
678                                                         
679                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
680                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
681                                                                                 good = false;
682                                                                                 break;
683                                                                         }
684                                                                 }else {
685                                                                         good = false;
686                                                                         break;
687                                                                 }
688                                                         }
689                                                 
690                                                         //passed the test so add you
691                                                         if (good) {
692                                                                 names.insert(name);
693                                                                 out << name << '\t' << tax << endl;
694                                                                 break;
695                                                         }
696                                                 }
697                                         }
698                                 }
699                         
700                         }
701                         
702                         m->gobble(in);
703                 }
704                 in.close();
705                 out.close();
706                 
707                 if (names.size() == 0) { m->mothurOut("Your taxonomy file does not contain any sequences from " + taxons + "."); m->mothurOutEndLine();  }
708                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
709                         
710                 return 0;
711
712         }
713         catch(exception& e) {
714                 m->errorOut(e, "GetLineageCommand", "readTax");
715                 exit(1);
716         }
717 }
718 /**************************************************************************************************/
719 vector< map<string, float> > GetLineageCommand::getTaxons(string tax) {
720         try {
721                 
722                 vector< map<string, float> > t;
723                 string taxon = "";
724                 int taxLength = tax.length();
725                 for(int i=0;i<taxLength;i++){
726                         if(tax[i] == ';'){
727                 
728                                 int openParen = taxon.find_first_of('(');
729                                 int closeParen = taxon.find_last_of(')');
730                                 
731                                 string newtaxon, confidence;
732                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
733                                         newtaxon = taxon.substr(0, openParen); //rip off confidence
734                                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
735                                 }else{
736                                         newtaxon = taxon;
737                                         confidence = "0";
738                                 } 
739                                 float con = 0;
740                                 convert(confidence, con);
741                                 
742                                 map<string, float> temp;
743                                 temp[newtaxon] = con;
744                                 t.push_back(temp);
745                                 
746                                 taxon = "";
747                         }
748                         else{
749                                 taxon += tax[i];
750                         }
751                 }
752                 
753                 return t;
754         }
755         catch(exception& e) {
756                 m->errorOut(e, "GetLineageCommand", "getTaxons");
757                 exit(1);
758         }
759 }
760 //**********************************************************************************************************************
761 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
762 int GetLineageCommand::readAlign(){
763         try {
764                 string thisOutputDir = outputDir;
765                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
766                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
767                 ofstream out;
768                 m->openOutputFile(outputFileName, out);
769                 
770
771                 ifstream in;
772                 m->openInputFile(alignfile, in);
773                 string name, junk;
774                 
775                 bool wroteSomething = false;
776                 
777                 //read column headers
778                 for (int i = 0; i < 16; i++) {  
779                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
780                         else                    {       break;                  }
781                 }
782                 out << endl;
783                 
784                 while(!in.eof()){
785                 
786                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
787
788
789                         in >> name;                             //read from first column
790                         
791                         //if this name is in the accnos file
792                         if (names.count(name) != 0) {
793                                 wroteSomething = true;
794                                 
795                                 out << name << '\t';
796                                 
797                                 //read rest
798                                 for (int i = 0; i < 15; i++) {  
799                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
800                                         else                    {       break;                  }
801                                 }
802                                 out << endl;
803                                 
804                         }else {//still read just don't do anything with it
805                                 //read rest
806                                 for (int i = 0; i < 15; i++) {  
807                                         if (!in.eof())  {       in >> junk;             }
808                                         else                    {       break;                  }
809                                 }
810                         }
811                         
812                         m->gobble(in);
813                 }
814                 in.close();
815                 out.close();
816                 
817                 if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
818                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
819                 
820                 return 0;
821                 
822         }
823         catch(exception& e) {
824                 m->errorOut(e, "GetLineageCommand", "readAlign");
825                 exit(1);
826         }
827 }
828 //**********************************************************************************************************************
829
830
831
832