]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveLineageCommand::setParameters(){   
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
22                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
23                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
24                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "RemoveLineageCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string RemoveLineageCommand::getHelpString(){   
39         try {
40                 string helpString = "";
41                 helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n";
42                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
43                 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
44                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
45                 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
46                 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
47                 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
48                 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
49                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
50                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
51                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=""){    
62         try {
63         string outputFileName = "";
64                 map<string, vector<string> >::iterator it;
65         
66         //is this a type this command creates
67         it = outputTypes.find(type);
68         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
69         else {
70             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
71             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
75             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
76             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
77         }
78         return outputFileName;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "RemoveLineageCommand", "getOutputFileNameTag");
82                 exit(1);
83         }
84 }
85 //**********************************************************************************************************************
86 RemoveLineageCommand::RemoveLineageCommand(){   
87         try {
88                 abort = true; calledHelp = true; 
89                 setParameters();
90                 vector<string> tempOutNames;
91                 outputTypes["fasta"] = tempOutNames;
92                 outputTypes["taxonomy"] = tempOutNames;
93                 outputTypes["name"] = tempOutNames;
94                 outputTypes["group"] = tempOutNames;
95                 outputTypes["alignreport"] = tempOutNames;
96                 outputTypes["list"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 RemoveLineageCommand::RemoveLineageCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();       
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["fasta"] = tempOutNames;
129                         outputTypes["taxonomy"] = tempOutNames;
130                         outputTypes["name"] = tempOutNames;
131                         outputTypes["group"] = tempOutNames;
132                         outputTypes["alignreport"] = tempOutNames;
133                         outputTypes["list"] = tempOutNames;
134                         
135                         //if the user changes the output directory command factory will send this info to us in the output parameter 
136                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
137                         
138                         //if the user changes the input directory command factory will send this info to us in the output parameter 
139                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
140                         if (inputDir == "not found"){   inputDir = "";          }
141                         else {
142                                 string path;
143                                 it = parameters.find("alignreport");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
149                                 }
150                                 
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("list");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
165                                 }
166                                 
167                                 it = parameters.find("name");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
173                                 }
174                                 
175                                 it = parameters.find("group");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
181                                 }
182                                 
183                                 it = parameters.find("taxonomy");
184                                 //user has given a template file
185                                 if(it != parameters.end()){ 
186                                         path = m->hasPath(it->second);
187                                         //if the user has not given a path then, add inputdir. else leave path alone.
188                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
189                                 }
190                         }
191
192                         
193                         //check for required parameters                 
194                         fastafile = validParameter.validFile(parameters, "fasta", true);
195                         if (fastafile == "not open") { fastafile = ""; abort = true; }
196                         else if (fastafile == "not found") {  fastafile = "";  }        
197                         else { m->setFastaFile(fastafile); }
198                         
199                         namefile = validParameter.validFile(parameters, "name", true);
200                         if (namefile == "not open") { namefile = ""; abort = true; }
201                         else if (namefile == "not found") {  namefile = "";  }  
202                         else { m->setNameFile(namefile); }
203                         
204                         groupfile = validParameter.validFile(parameters, "group", true);
205                         if (groupfile == "not open") { abort = true; }
206                         else if (groupfile == "not found") {  groupfile = "";  }        
207                         else { m->setGroupFile(groupfile); }
208                         
209                         alignfile = validParameter.validFile(parameters, "alignreport", true);
210                         if (alignfile == "not open") { abort = true; }
211                         else if (alignfile == "not found") {  alignfile = "";  }
212                         
213                         listfile = validParameter.validFile(parameters, "list", true);
214                         if (listfile == "not open") { abort = true; }
215                         else if (listfile == "not found") {  listfile = "";  }
216                         else { m->setListFile(listfile); }
217                         
218                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
219                         if (taxfile == "not open") { taxfile = ""; abort = true; }
220                         else if (taxfile == "not found") {              
221                                 taxfile = m->getTaxonomyFile(); 
222                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
223                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
224                         }else { m->setTaxonomyFile(taxfile); }
225                         
226                         string usedDups = "true";
227                         string temp = validParameter.validFile(parameters, "dups", false);      
228                         if (temp == "not found") { 
229                                 if (namefile != "") {  temp = "true";                                   }
230                                 else                            {  temp = "false"; usedDups = "";       }
231                         }
232                         dups = m->isTrue(temp);
233                         
234                         taxons = validParameter.validFile(parameters, "taxon", false);  
235                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
236                         else { 
237                                 //rip off quotes
238                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
239                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
240                         }
241                         m->splitAtChar(taxons, listOfTaxons, '-');
242                         
243                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
244                 
245                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
246                         
247                         if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
248                                 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
249                                 parser.getNameFile(files);
250                         }
251                         
252                 }
253
254         }
255         catch(exception& e) {
256                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
257                 exit(1);
258         }
259 }
260 //**********************************************************************************************************************
261
262 int RemoveLineageCommand::execute(){
263         try {
264                 
265                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
266                 
267                 if (m->control_pressed) { return 0; }
268                 
269                 //read through the correct file and output lines you want to keep
270                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
271                 if (namefile != "")                     {               readName();             }
272                 if (fastafile != "")            {               readFasta();    }
273                 if (groupfile != "")            {               readGroup();    }
274                 if (alignfile != "")            {               readAlign();    }
275                 if (listfile != "")                     {               readList();             }
276                 
277                 
278                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
279                 
280                 if (outputNames.size() != 0) {
281                         m->mothurOutEndLine();
282                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
283                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
284                         m->mothurOutEndLine();
285                         
286                         //set fasta file as new current fastafile
287                         string current = "";
288                         itTypes = outputTypes.find("fasta");
289                         if (itTypes != outputTypes.end()) {
290                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
291                         }
292                         
293                         itTypes = outputTypes.find("name");
294                         if (itTypes != outputTypes.end()) {
295                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
296                         }
297                         
298                         itTypes = outputTypes.find("group");
299                         if (itTypes != outputTypes.end()) {
300                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
301                         }
302                         
303                         itTypes = outputTypes.find("list");
304                         if (itTypes != outputTypes.end()) {
305                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
306                         }
307                         
308                         itTypes = outputTypes.find("taxonomy");
309                         if (itTypes != outputTypes.end()) {
310                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
311                         }
312                 }
313                 
314                 return 0;               
315         }
316
317         catch(exception& e) {
318                 m->errorOut(e, "RemoveLineageCommand", "execute");
319                 exit(1);
320         }
321 }
322
323 //**********************************************************************************************************************
324 int RemoveLineageCommand::readFasta(){
325         try {
326                 string thisOutputDir = outputDir;
327                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
328                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);         
329                 ofstream out;
330                 m->openOutputFile(outputFileName, out);
331                 
332                 ifstream in;
333                 m->openInputFile(fastafile, in);
334                 string name;
335                 
336                 bool wroteSomething = false;
337                 
338                 while(!in.eof()){
339                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
340                         
341                         Sequence currSeq(in);
342                         name = currSeq.getName();
343                         
344                         if (name != "") {
345                                 //if this name is in the accnos file
346                                 if (names.count(name) == 0) {
347                                         wroteSomething = true;
348                                         
349                                         currSeq.printSequence(out);
350                                 }
351                         }
352                         m->gobble(in);
353                 }
354                 in.close();     
355                 out.close();
356                 
357                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
358                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
359                 
360                 return 0;
361                 
362         }
363         catch(exception& e) {
364                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
365                 exit(1);
366         }
367 }
368 //**********************************************************************************************************************
369 int RemoveLineageCommand::readList(){
370         try {
371                 string thisOutputDir = outputDir;
372                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
373                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);            
374                 ofstream out;
375                 m->openOutputFile(outputFileName, out);
376                 
377                 ifstream in;
378                 m->openInputFile(listfile, in);
379                 
380                 bool wroteSomething = false;
381                 
382                 while(!in.eof()){
383                         //read in list vector
384                         ListVector list(in);
385                         
386                         //make a new list vector
387                         ListVector newList;
388                         newList.setLabel(list.getLabel());
389                         
390                         //for each bin
391                         for (int i = 0; i < list.getNumBins(); i++) {
392                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
393                         
394                                 //parse out names that are in accnos file
395                                 string binnames = list.get(i);
396                                 
397                                 string newNames = "";
398                                 while (binnames.find_first_of(',') != -1) { 
399                                         string name = binnames.substr(0,binnames.find_first_of(','));
400                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
401                                         
402                                         //if that name is in the .accnos file, add it
403                                         if (names.count(name) == 0) {  newNames += name + ",";  }
404                                 }
405                         
406                                 //get last name
407                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
408
409                                 //if there are names in this bin add to new list
410                                 if (newNames != "") {  
411                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
412                                         newList.push_back(newNames);    
413                                 }
414                         }
415                                 
416                         //print new listvector
417                         if (newList.getNumBins() != 0) {
418                                 wroteSomething = true;
419                                 newList.print(out);
420                         }
421                         
422                         m->gobble(in);
423                 }
424                 in.close();     
425                 out.close();
426                 
427                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
428                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
429                                 
430                 return 0;
431
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "RemoveLineageCommand", "readList");
435                 exit(1);
436         }
437 }
438 //**********************************************************************************************************************
439 int RemoveLineageCommand::readName(){
440         try {
441                 string thisOutputDir = outputDir;
442                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
443                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
444                 ofstream out;
445                 m->openOutputFile(outputFileName, out);
446
447                 ifstream in;
448                 m->openInputFile(namefile, in);
449                 string name, firstCol, secondCol;
450                 
451                 bool wroteSomething = false;
452                 
453                 while(!in.eof()){
454                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
455
456                         in >> firstCol;                         
457                         in >> secondCol;                        
458
459                         vector<string> parsedNames;
460                         m->splitAtComma(secondCol, parsedNames);
461                         
462                         vector<string> validSecond;  validSecond.clear();
463                         for (int i = 0; i < parsedNames.size(); i++) {
464                                 if (names.count(parsedNames[i]) == 0) {
465                                         validSecond.push_back(parsedNames[i]);
466                                 }
467                         }
468                         
469                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
470                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
471                         }else {
472                                         //if the name in the first column is in the set then print it and any other names in second column also in set
473                                 if (names.count(firstCol) == 0) {
474                                         
475                                         wroteSomething = true;
476                                         
477                                         out << firstCol << '\t';
478                                         
479                                         //you know you have at least one valid second since first column is valid
480                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
481                                         out << validSecond[validSecond.size()-1] << endl;
482                                         
483                                         //make first name in set you come to first column and then add the remaining names to second column
484                                 }else {
485                                         
486                                         //you want part of this row
487                                         if (validSecond.size() != 0) {
488                                                 
489                                                 wroteSomething = true;
490                                                 
491                                                 out << validSecond[0] << '\t';
492                                                 
493                                                 //you know you have at least one valid second since first column is valid
494                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
495                                                 out << validSecond[validSecond.size()-1] << endl;
496                                         }
497                                 }
498                         }
499                         m->gobble(in);
500                 }
501                 in.close();
502                 out.close();
503
504                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
505                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
506                                 
507                 return 0;
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "RemoveLineageCommand", "readName");
511                 exit(1);
512         }
513 }
514
515 //**********************************************************************************************************************
516 int RemoveLineageCommand::readGroup(){
517         try {
518                 string thisOutputDir = outputDir;
519                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
520                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);         
521                 ofstream out;
522                 m->openOutputFile(outputFileName, out);
523
524                 ifstream in;
525                 m->openInputFile(groupfile, in);
526                 string name, group;
527                 
528                 bool wroteSomething = false;
529                 
530                 while(!in.eof()){
531                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
532                         
533                         in >> name;                             //read from first column
534                         in >> group;                    //read from second column
535                         
536                         //if this name is in the accnos file
537                         if (names.count(name) == 0) {
538                                 wroteSomething = true;
539                                 out << name << '\t' << group << endl;
540                         }
541                                         
542                         m->gobble(in);
543                 }
544                 in.close();
545                 out.close();
546                 
547                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
548                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
549                 
550                 return 0;
551         }
552         catch(exception& e) {
553                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
554                 exit(1);
555         }
556 }
557 //**********************************************************************************************************************
558 int RemoveLineageCommand::readTax(){
559         try {
560                 string thisOutputDir = outputDir;
561                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
562                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
563                 ofstream out;
564                 m->openOutputFile(outputFileName, out);
565                 
566                 ifstream in;
567                 m->openInputFile(taxfile, in);
568                 string name, tax;
569                 
570                 bool wroteSomething = false;
571                 
572                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
573                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
574                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
575                 
576                 for (int i = 0; i < listOfTaxons.size(); i++) {
577                         noConfidenceTaxons[i] = listOfTaxons[i];
578                         int hasConPos = listOfTaxons[i].find_first_of('(');
579                         if (hasConPos != string::npos) {  
580                                 taxonsHasConfidence[i] = true; 
581                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
582                                 noConfidenceTaxons[i] = listOfTaxons[i];
583                                 m->removeConfidences(noConfidenceTaxons[i]);
584                         }
585                 }
586                 
587                 
588                 while(!in.eof()){
589
590                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
591
592                         in >> name;                             //read from first column
593                         in >> tax;                      //read from second column
594                         
595                         bool remove = false;
596                         
597                         for (int j = 0; j < listOfTaxons.size(); j++) {
598                                 string newtax = tax;
599                                 
600                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
601                                 if (!taxonsHasConfidence[j]) {
602                                         
603                                         int hasConfidences = tax.find_first_of('(');
604                                         if (hasConfidences != string::npos) { 
605                                                 newtax = tax;
606                                                 m->removeConfidences(newtax);
607                                         }
608                                         
609                                         int pos = newtax.find(noConfidenceTaxons[j]);
610                                         
611                                         if (pos == string::npos) { 
612                                                 //wroteSomething = true;
613                                                 //out << name << '\t' << tax << endl;
614                                         }else{ //this sequence contains the taxon the user wants to remove
615                                                 names.insert(name);
616                                                 remove=true; break;
617                                         }
618                                         
619                                 }else{//if taxons has them and you don't them remove taxons
620                                         int hasConfidences = tax.find_first_of('(');
621                                         if (hasConfidences == string::npos) { 
622                                                 
623                                                 int pos = newtax.find(noConfidenceTaxons[j]);
624                                                 
625                                                 if (pos == string::npos) { 
626                                                         //wroteSomething = true;
627                                                         //out << name << '\t' << tax << endl;
628                                                 }else{ //this sequence contains the taxon the user wants to remove
629                                                         names.insert(name);
630                                                         remove=true; break;
631                                                 }
632                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
633                                                 //first remove confidences from both and see if the taxonomy exists
634                                                 
635                                                 string noNewTax = tax;
636                                                 int hasConfidences = tax.find_first_of('(');
637                                                 if (hasConfidences != string::npos) { 
638                                                         noNewTax = tax;
639                                                         m->removeConfidences(noNewTax);
640                                                 }
641                                                 
642                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
643                                                 
644                                                 if (pos != string::npos) { //if yes, then are the confidences okay
645                                                         
646                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
647                                                         
648                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
649                                                         //we want to "line them up", so we will find the the index where the searchstring starts
650                                                         int index = 0;
651                                                         for (int i = 0; i < usersTaxon.size(); i++) {
652                                                                 
653                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
654                                                                         index = i;  
655                                                                         int spot = 0;
656                                                                         bool goodspot = true;
657                                                                         //is this really the start, or are we dealing with a taxon of the same name?
658                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
659                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
660                                                                                 else { spot++; }
661                                                                         }
662                                                                         
663                                                                         if (goodspot) { break; }
664                                                                 }
665                                                         }
666                                                         
667                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
668                                                                 
669                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
670                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
671                                                                                 remove = true;
672                                                                                 break;
673                                                                         }
674                                                                 }else {
675                                                                         remove = true;
676                                                                         break;
677                                                                 }
678                                                         }
679                                                         
680                                                         //passed the test so remove you
681                                                         if (remove) {
682                                                                 names.insert(name);
683                                                                 remove=true; break;
684                                                         }else {
685                                                                 //wroteSomething = true;
686                                                                 //out << name << '\t' << tax << endl;
687                                                         }
688                                                 }else {
689                                                         //wroteSomething = true;
690                                                         //out << name << '\t' << tax << endl;
691                                                 }
692                                         }
693                                 }
694                                 
695                         }
696                         
697                         if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
698                         m->gobble(in);
699                 }
700                 in.close();
701                 out.close();
702                 
703                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
704                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
705                         
706                 return 0;
707
708         }
709         catch(exception& e) {
710                 m->errorOut(e, "RemoveLineageCommand", "readTax");
711                 exit(1);
712         }
713 }
714 /**************************************************************************************************/
715 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
716         try {
717                 
718                 vector< map<string, float> > t;
719                 string taxon = "";
720                 int taxLength = tax.length();
721                 for(int i=0;i<taxLength;i++){
722                         if(tax[i] == ';'){
723                                 
724                                 int openParen = taxon.find_first_of('(');
725                                 int closeParen = taxon.find_last_of(')');
726                                 
727                                 string newtaxon, confidence;
728                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
729                                         newtaxon = taxon.substr(0, openParen); //rip off confidence
730                                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
731                                 }else{
732                                         newtaxon = taxon;
733                                         confidence = "0";
734                                 }
735                                 float con = 0;
736                                 convert(confidence, con);
737                                 
738                                 map<string, float> temp;
739                                 temp[newtaxon] = con;
740                                 t.push_back(temp);
741                                 
742                                 taxon = "";
743                         }
744                         else{
745                                 taxon += tax[i];
746                         }
747                 }
748                 
749                 return t;
750         }
751         catch(exception& e) {
752                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
753                 exit(1);
754         }
755 }
756 //**********************************************************************************************************************
757 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
758 int RemoveLineageCommand::readAlign(){
759         try {
760                 string thisOutputDir = outputDir;
761                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
762                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
763                 
764                 ofstream out;
765                 m->openOutputFile(outputFileName, out);
766
767                 ifstream in;
768                 m->openInputFile(alignfile, in);
769                 string name, junk;
770                 
771                 bool wroteSomething = false;
772                 
773                 //read column headers
774                 for (int i = 0; i < 16; i++) {  
775                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
776                         else                    {       break;                  }
777                 }
778                 out << endl;
779                 
780                 while(!in.eof()){
781                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
782                         
783                         in >> name;                             //read from first column
784                         
785                         //if this name is in the accnos file
786                         if (names.count(name) == 0) {
787                                 wroteSomething = true;
788                                 
789                                 out << name << '\t';
790                                 
791                                 //read rest
792                                 for (int i = 0; i < 15; i++) {  
793                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
794                                         else                    {       break;                  }
795                                 }
796                                 out << endl;
797                                 
798                         }else {//still read just don't do anything with it
799                                 
800                                 //read rest
801                                 for (int i = 0; i < 15; i++) {  
802                                         if (!in.eof())  {       in >> junk;             }
803                                         else                    {       break;                  }
804                                 }
805                         }
806                         
807                         m->gobble(in);
808                 }
809                 in.close();
810                 out.close();
811                 
812                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
813                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
814                 
815                 return 0;
816                 
817         }
818         catch(exception& e) {
819                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
820                 exit(1);
821         }
822 }
823 //**********************************************************************************************************************
824