]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveLineageCommand::setParameters(){   
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
18         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
19         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
21                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
22                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
23                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
24                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
25                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
26                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28                 
29                 vector<string> myArray;
30                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
31                 return myArray;
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "RemoveLineageCommand", "setParameters");
35                 exit(1);
36         }
37 }
38 //**********************************************************************************************************************
39 string RemoveLineageCommand::getHelpString(){   
40         try {
41                 string helpString = "";
42                 helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
43                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
44                 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
45                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
46                 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
47                 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
48                 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
49                 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
50                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
51                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
52                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=""){    
63         try {
64         string outputFileName = "";
65                 map<string, vector<string> >::iterator it;
66         
67         //is this a type this command creates
68         it = outputTypes.find(type);
69         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
70         else {
71             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
75             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
76             else if (type == "count")       {   outputFileName =  "pick.count.table";   }
77             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
78             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
79         }
80         return outputFileName;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "RemoveLineageCommand", "getOutputFileNameTag");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 RemoveLineageCommand::RemoveLineageCommand(){   
89         try {
90                 abort = true; calledHelp = true; 
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["fasta"] = tempOutNames;
94                 outputTypes["taxonomy"] = tempOutNames;
95                 outputTypes["name"] = tempOutNames;
96                 outputTypes["group"] = tempOutNames;
97                 outputTypes["alignreport"] = tempOutNames;
98                 outputTypes["list"] = tempOutNames;
99         outputTypes["count"] = tempOutNames;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 RemoveLineageCommand::RemoveLineageCommand(string option)  {
108         try {
109                 abort = false; calledHelp = false;   
110                                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();       
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         
121                         ValidParameters validParameter;
122                         map<string,string>::iterator it;
123                         
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["fasta"] = tempOutNames;
132                         outputTypes["taxonomy"] = tempOutNames;
133                         outputTypes["name"] = tempOutNames;
134                         outputTypes["group"] = tempOutNames;
135                         outputTypes["alignreport"] = tempOutNames;
136                         outputTypes["list"] = tempOutNames;
137             outputTypes["count"] = tempOutNames;
138                         
139                         //if the user changes the output directory command factory will send this info to us in the output parameter 
140                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
141                         
142                         //if the user changes the input directory command factory will send this info to us in the output parameter 
143                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
144                         if (inputDir == "not found"){   inputDir = "";          }
145                         else {
146                                 string path;
147                                 it = parameters.find("alignreport");
148                                 //user has given a template file
149                                 if(it != parameters.end()){ 
150                                         path = m->hasPath(it->second);
151                                         //if the user has not given a path then, add inputdir. else leave path alone.
152                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
153                                 }
154                                 
155                                 it = parameters.find("fasta");
156                                 //user has given a template file
157                                 if(it != parameters.end()){ 
158                                         path = m->hasPath(it->second);
159                                         //if the user has not given a path then, add inputdir. else leave path alone.
160                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
161                                 }
162                                 
163                                 it = parameters.find("list");
164                                 //user has given a template file
165                                 if(it != parameters.end()){ 
166                                         path = m->hasPath(it->second);
167                                         //if the user has not given a path then, add inputdir. else leave path alone.
168                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
169                                 }
170                                 
171                                 it = parameters.find("name");
172                                 //user has given a template file
173                                 if(it != parameters.end()){ 
174                                         path = m->hasPath(it->second);
175                                         //if the user has not given a path then, add inputdir. else leave path alone.
176                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
177                                 }
178                                 
179                                 it = parameters.find("group");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
185                                 }
186                                 
187                                 it = parameters.find("taxonomy");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
193                                 }
194                 
195                 it = parameters.find("count");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
201                                 }
202                         }
203
204                         
205                         //check for required parameters                 
206                         fastafile = validParameter.validFile(parameters, "fasta", true);
207                         if (fastafile == "not open") { fastafile = ""; abort = true; }
208                         else if (fastafile == "not found") {  fastafile = "";  }        
209                         else { m->setFastaFile(fastafile); }
210                         
211                         namefile = validParameter.validFile(parameters, "name", true);
212                         if (namefile == "not open") { namefile = ""; abort = true; }
213                         else if (namefile == "not found") {  namefile = "";  }  
214                         else { m->setNameFile(namefile); }
215                         
216                         groupfile = validParameter.validFile(parameters, "group", true);
217                         if (groupfile == "not open") { abort = true; }
218                         else if (groupfile == "not found") {  groupfile = "";  }        
219                         else { m->setGroupFile(groupfile); }
220                         
221                         alignfile = validParameter.validFile(parameters, "alignreport", true);
222                         if (alignfile == "not open") { abort = true; }
223                         else if (alignfile == "not found") {  alignfile = "";  }
224                         
225                         listfile = validParameter.validFile(parameters, "list", true);
226                         if (listfile == "not open") { abort = true; }
227                         else if (listfile == "not found") {  listfile = "";  }
228                         else { m->setListFile(listfile); }
229                         
230                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
231                         if (taxfile == "not open") { taxfile = ""; abort = true; }
232                         else if (taxfile == "not found") {              
233                                 taxfile = m->getTaxonomyFile(); 
234                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
235                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
236                         }else { m->setTaxonomyFile(taxfile); }
237                         
238             countfile = validParameter.validFile(parameters, "count", true);
239             if (countfile == "not open") { countfile = ""; abort = true; }
240             else if (countfile == "not found") { countfile = "";  }     
241             else { m->setCountTableFile(countfile); }
242             
243             if ((namefile != "") && (countfile != "")) {
244                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
245             }
246             
247             if ((groupfile != "") && (countfile != "")) {
248                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
249             }
250             
251                         string usedDups = "true";
252                         string temp = validParameter.validFile(parameters, "dups", false);      
253                         if (temp == "not found") { 
254                                 if (namefile != "") {  temp = "true";                                   }
255                                 else                            {  temp = "false"; usedDups = "";       }
256                         }
257                         dups = m->isTrue(temp);
258                         
259                         taxons = validParameter.validFile(parameters, "taxon", false);  
260                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
261                         else { 
262                                 //rip off quotes
263                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
264                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
265                         }
266                         m->splitAtChar(taxons, listOfTaxons, '-');
267                         
268                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
269                 
270                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
271                         
272                         if (countfile == "") {
273                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
274                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
275                     parser.getNameFile(files);
276                 }
277             }
278                         
279                 }
280
281         }
282         catch(exception& e) {
283                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
284                 exit(1);
285         }
286 }
287 //**********************************************************************************************************************
288
289 int RemoveLineageCommand::execute(){
290         try {
291                 
292                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
293                 
294                 if (m->control_pressed) { return 0; }
295         
296         if (countfile != "") {
297             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
298                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
299             }
300         }
301                 
302                 //read through the correct file and output lines you want to keep
303                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
304                 if (namefile != "")                     {               readName();             }
305                 if (fastafile != "")            {               readFasta();    }
306                 if (groupfile != "")            {               readGroup();    }
307                 if (alignfile != "")            {               readAlign();    }
308                 if (listfile != "")                     {               readList();             }
309         if (countfile != "")            {               readCount();    }
310                 
311                 
312                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
313                 
314                 if (outputNames.size() != 0) {
315                         m->mothurOutEndLine();
316                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
317                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
318                         m->mothurOutEndLine();
319                         
320                         //set fasta file as new current fastafile
321                         string current = "";
322                         itTypes = outputTypes.find("fasta");
323                         if (itTypes != outputTypes.end()) {
324                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
325                         }
326                         
327                         itTypes = outputTypes.find("name");
328                         if (itTypes != outputTypes.end()) {
329                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
330                         }
331                         
332                         itTypes = outputTypes.find("group");
333                         if (itTypes != outputTypes.end()) {
334                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
335                         }
336                         
337                         itTypes = outputTypes.find("list");
338                         if (itTypes != outputTypes.end()) {
339                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
340                         }
341                         
342                         itTypes = outputTypes.find("taxonomy");
343                         if (itTypes != outputTypes.end()) {
344                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
345                         }
346             
347             itTypes = outputTypes.find("count");
348                         if (itTypes != outputTypes.end()) {
349                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
350                         }
351                 }
352                 
353                 return 0;               
354         }
355
356         catch(exception& e) {
357                 m->errorOut(e, "RemoveLineageCommand", "execute");
358                 exit(1);
359         }
360 }
361
362 //**********************************************************************************************************************
363 int RemoveLineageCommand::readFasta(){
364         try {
365                 string thisOutputDir = outputDir;
366                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
367                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);         
368                 ofstream out;
369                 m->openOutputFile(outputFileName, out);
370                 
371                 ifstream in;
372                 m->openInputFile(fastafile, in);
373                 string name;
374                 
375                 bool wroteSomething = false;
376                 
377                 while(!in.eof()){
378                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
379                         
380                         Sequence currSeq(in);
381                         name = currSeq.getName();
382                         
383                         if (name != "") {
384                                 //if this name is in the accnos file
385                                 if (names.count(name) == 0) {
386                                         wroteSomething = true;
387                                         
388                                         currSeq.printSequence(out);
389                                 }
390                         }
391                         m->gobble(in);
392                 }
393                 in.close();     
394                 out.close();
395                 
396                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
397                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
398                 
399                 return 0;
400                 
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
404                 exit(1);
405         }
406 }
407 //**********************************************************************************************************************
408 int RemoveLineageCommand::readList(){
409         try {
410                 string thisOutputDir = outputDir;
411                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
412                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);            
413                 ofstream out;
414                 m->openOutputFile(outputFileName, out);
415                 
416                 ifstream in;
417                 m->openInputFile(listfile, in);
418                 
419                 bool wroteSomething = false;
420                 
421                 while(!in.eof()){
422                         //read in list vector
423                         ListVector list(in);
424                         
425                         //make a new list vector
426                         ListVector newList;
427                         newList.setLabel(list.getLabel());
428                         
429                         //for each bin
430                         for (int i = 0; i < list.getNumBins(); i++) {
431                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
432                         
433                                 //parse out names that are in accnos file
434                                 string binnames = list.get(i);
435                                 
436                                 string newNames = "";
437                                 while (binnames.find_first_of(',') != -1) { 
438                                         string name = binnames.substr(0,binnames.find_first_of(','));
439                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
440                                         
441                                         //if that name is in the .accnos file, add it
442                                         if (names.count(name) == 0) {  newNames += name + ",";  }
443                                 }
444                         
445                                 //get last name
446                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
447
448                                 //if there are names in this bin add to new list
449                                 if (newNames != "") {  
450                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
451                                         newList.push_back(newNames);    
452                                 }
453                         }
454                                 
455                         //print new listvector
456                         if (newList.getNumBins() != 0) {
457                                 wroteSomething = true;
458                                 newList.print(out);
459                         }
460                         
461                         m->gobble(in);
462                 }
463                 in.close();     
464                 out.close();
465                 
466                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
467                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
468                                 
469                 return 0;
470
471         }
472         catch(exception& e) {
473                 m->errorOut(e, "RemoveLineageCommand", "readList");
474                 exit(1);
475         }
476 }
477 //**********************************************************************************************************************
478 int RemoveLineageCommand::readName(){
479         try {
480                 string thisOutputDir = outputDir;
481                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
482                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
483                 ofstream out;
484                 m->openOutputFile(outputFileName, out);
485
486                 ifstream in;
487                 m->openInputFile(namefile, in);
488                 string name, firstCol, secondCol;
489                 
490                 bool wroteSomething = false;
491                 
492                 while(!in.eof()){
493                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
494
495                         in >> firstCol;                         
496                         in >> secondCol;                        
497
498                         vector<string> parsedNames;
499                         m->splitAtComma(secondCol, parsedNames);
500                         
501                         vector<string> validSecond;  validSecond.clear();
502                         for (int i = 0; i < parsedNames.size(); i++) {
503                                 if (names.count(parsedNames[i]) == 0) {
504                                         validSecond.push_back(parsedNames[i]);
505                                 }
506                         }
507                         
508                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
509                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
510                         }else {
511                                         //if the name in the first column is in the set then print it and any other names in second column also in set
512                                 if (names.count(firstCol) == 0) {
513                                         
514                                         wroteSomething = true;
515                                         
516                                         out << firstCol << '\t';
517                                         
518                                         //you know you have at least one valid second since first column is valid
519                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
520                                         out << validSecond[validSecond.size()-1] << endl;
521                                         
522                                         //make first name in set you come to first column and then add the remaining names to second column
523                                 }else {
524                                         
525                                         //you want part of this row
526                                         if (validSecond.size() != 0) {
527                                                 
528                                                 wroteSomething = true;
529                                                 
530                                                 out << validSecond[0] << '\t';
531                                                 
532                                                 //you know you have at least one valid second since first column is valid
533                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
534                                                 out << validSecond[validSecond.size()-1] << endl;
535                                         }
536                                 }
537                         }
538                         m->gobble(in);
539                 }
540                 in.close();
541                 out.close();
542
543                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
544                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
545                                 
546                 return 0;
547         }
548         catch(exception& e) {
549                 m->errorOut(e, "RemoveLineageCommand", "readName");
550                 exit(1);
551         }
552 }
553 //**********************************************************************************************************************
554 int RemoveLineageCommand::readCount(){
555         try {
556                 string thisOutputDir = outputDir;
557                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
558                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
559                 
560                 ofstream out;
561                 m->openOutputFile(outputFileName, out);
562                 
563                 ifstream in;
564                 m->openInputFile(countfile, in);
565                 
566                 bool wroteSomething = false;
567                 
568         string headers = m->getline(in); m->gobble(in);
569         out << headers << endl;
570         
571         string name, rest; int thisTotal;
572         while (!in.eof()) {
573             
574             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
575             
576             in >> name; m->gobble(in); 
577             in >> thisTotal; m->gobble(in);
578             rest = m->getline(in); m->gobble(in);
579             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
580             
581             if (names.count(name) == 0) {
582                 out << name << '\t' << thisTotal << '\t' << rest << endl;
583                 wroteSomething = true;
584             }
585         }
586         in.close();
587                 out.close();
588                 
589                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
590                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
591         
592                 return 0;
593         }
594         catch(exception& e) {
595                 m->errorOut(e, "RemoveLineageCommand", "readCount");
596                 exit(1);
597         }
598 }
599 //**********************************************************************************************************************
600 int RemoveLineageCommand::readGroup(){
601         try {
602                 string thisOutputDir = outputDir;
603                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
604                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);         
605                 ofstream out;
606                 m->openOutputFile(outputFileName, out);
607
608                 ifstream in;
609                 m->openInputFile(groupfile, in);
610                 string name, group;
611                 
612                 bool wroteSomething = false;
613                 
614                 while(!in.eof()){
615                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
616                         
617                         in >> name;                             //read from first column
618                         in >> group;                    //read from second column
619                         
620                         //if this name is in the accnos file
621                         if (names.count(name) == 0) {
622                                 wroteSomething = true;
623                                 out << name << '\t' << group << endl;
624                         }
625                                         
626                         m->gobble(in);
627                 }
628                 in.close();
629                 out.close();
630                 
631                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
632                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
633                 
634                 return 0;
635         }
636         catch(exception& e) {
637                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
638                 exit(1);
639         }
640 }
641 //**********************************************************************************************************************
642 int RemoveLineageCommand::readTax(){
643         try {
644                 string thisOutputDir = outputDir;
645                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
646                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
647                 ofstream out;
648                 m->openOutputFile(outputFileName, out);
649                 
650                 ifstream in;
651                 m->openInputFile(taxfile, in);
652                 string name, tax;
653                 
654                 bool wroteSomething = false;
655                 
656                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
657                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
658                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
659                 
660                 for (int i = 0; i < listOfTaxons.size(); i++) {
661                         noConfidenceTaxons[i] = listOfTaxons[i];
662                         int hasConPos = listOfTaxons[i].find_first_of('(');
663                         if (hasConPos != string::npos) {  
664                                 taxonsHasConfidence[i] = true; 
665                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
666                                 noConfidenceTaxons[i] = listOfTaxons[i];
667                                 m->removeConfidences(noConfidenceTaxons[i]);
668                         }
669                 }
670                 
671                 
672                 while(!in.eof()){
673
674                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
675
676                         in >> name;                             //read from first column
677                         in >> tax;                      //read from second column
678                         
679                         bool remove = false;
680                         
681                         for (int j = 0; j < listOfTaxons.size(); j++) {
682                                 string newtax = tax;
683                                 
684                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
685                                 if (!taxonsHasConfidence[j]) {
686                                         
687                                         int hasConfidences = tax.find_first_of('(');
688                                         if (hasConfidences != string::npos) { 
689                                                 newtax = tax;
690                                                 m->removeConfidences(newtax);
691                                         }
692                                         
693                                         int pos = newtax.find(noConfidenceTaxons[j]);
694                                         
695                                         if (pos == string::npos) { 
696                                                 //wroteSomething = true;
697                                                 //out << name << '\t' << tax << endl;
698                                         }else{ //this sequence contains the taxon the user wants to remove
699                                                 names.insert(name);
700                                                 remove=true; break;
701                                         }
702                                         
703                                 }else{//if taxons has them and you don't them remove taxons
704                                         int hasConfidences = tax.find_first_of('(');
705                                         if (hasConfidences == string::npos) { 
706                                                 
707                                                 int pos = newtax.find(noConfidenceTaxons[j]);
708                                                 
709                                                 if (pos == string::npos) { 
710                                                         //wroteSomething = true;
711                                                         //out << name << '\t' << tax << endl;
712                                                 }else{ //this sequence contains the taxon the user wants to remove
713                                                         names.insert(name);
714                                                         remove=true; break;
715                                                 }
716                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
717                                                 //first remove confidences from both and see if the taxonomy exists
718                                                 
719                                                 string noNewTax = tax;
720                                                 int hasConfidences = tax.find_first_of('(');
721                                                 if (hasConfidences != string::npos) { 
722                                                         noNewTax = tax;
723                                                         m->removeConfidences(noNewTax);
724                                                 }
725                                                 
726                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
727                                                 
728                                                 if (pos != string::npos) { //if yes, then are the confidences okay
729                                                         
730                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
731                                                         
732                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
733                                                         //we want to "line them up", so we will find the the index where the searchstring starts
734                                                         int index = 0;
735                                                         for (int i = 0; i < usersTaxon.size(); i++) {
736                                                                 
737                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
738                                                                         index = i;  
739                                                                         int spot = 0;
740                                                                         bool goodspot = true;
741                                                                         //is this really the start, or are we dealing with a taxon of the same name?
742                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
743                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
744                                                                                 else { spot++; }
745                                                                         }
746                                                                         
747                                                                         if (goodspot) { break; }
748                                                                 }
749                                                         }
750                                                         
751                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
752                                                                 
753                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
754                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
755                                                                                 remove = true;
756                                                                                 break;
757                                                                         }
758                                                                 }else {
759                                                                         remove = true;
760                                                                         break;
761                                                                 }
762                                                         }
763                                                         
764                                                         //passed the test so remove you
765                                                         if (remove) {
766                                                                 names.insert(name);
767                                                                 remove=true; break;
768                                                         }else {
769                                                                 //wroteSomething = true;
770                                                                 //out << name << '\t' << tax << endl;
771                                                         }
772                                                 }else {
773                                                         //wroteSomething = true;
774                                                         //out << name << '\t' << tax << endl;
775                                                 }
776                                         }
777                                 }
778                                 
779                         }
780                         
781                         if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
782                         m->gobble(in);
783                 }
784                 in.close();
785                 out.close();
786                 
787                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
788                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
789                         
790                 return 0;
791
792         }
793         catch(exception& e) {
794                 m->errorOut(e, "RemoveLineageCommand", "readTax");
795                 exit(1);
796         }
797 }
798 /**************************************************************************************************/
799 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
800         try {
801                 
802                 vector< map<string, float> > t;
803                 string taxon = "";
804                 int taxLength = tax.length();
805                 for(int i=0;i<taxLength;i++){
806                         if(tax[i] == ';'){
807                                 
808                                 int openParen = taxon.find_first_of('(');
809                                 int closeParen = taxon.find_last_of(')');
810                                 
811                                 string newtaxon, confidence;
812                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
813                                         newtaxon = taxon.substr(0, openParen); //rip off confidence
814                                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
815                                 }else{
816                                         newtaxon = taxon;
817                                         confidence = "0";
818                                 }
819                                 float con = 0;
820                                 convert(confidence, con);
821                                 
822                                 map<string, float> temp;
823                                 temp[newtaxon] = con;
824                                 t.push_back(temp);
825                                 
826                                 taxon = "";
827                         }
828                         else{
829                                 taxon += tax[i];
830                         }
831                 }
832                 
833                 return t;
834         }
835         catch(exception& e) {
836                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
837                 exit(1);
838         }
839 }
840 //**********************************************************************************************************************
841 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
842 int RemoveLineageCommand::readAlign(){
843         try {
844                 string thisOutputDir = outputDir;
845                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
846                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
847                 
848                 ofstream out;
849                 m->openOutputFile(outputFileName, out);
850
851                 ifstream in;
852                 m->openInputFile(alignfile, in);
853                 string name, junk;
854                 
855                 bool wroteSomething = false;
856                 
857                 //read column headers
858                 for (int i = 0; i < 16; i++) {  
859                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
860                         else                    {       break;                  }
861                 }
862                 out << endl;
863                 
864                 while(!in.eof()){
865                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
866                         
867                         in >> name;                             //read from first column
868                         
869                         //if this name is in the accnos file
870                         if (names.count(name) == 0) {
871                                 wroteSomething = true;
872                                 
873                                 out << name << '\t';
874                                 
875                                 //read rest
876                                 for (int i = 0; i < 15; i++) {  
877                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
878                                         else                    {       break;                  }
879                                 }
880                                 out << endl;
881                                 
882                         }else {//still read just don't do anything with it
883                                 
884                                 //read rest
885                                 for (int i = 0; i < 15; i++) {  
886                                         if (!in.eof())  {       in >> junk;             }
887                                         else                    {       break;                  }
888                                 }
889                         }
890                         
891                         m->gobble(in);
892                 }
893                 in.close();
894                 out.close();
895                 
896                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
897                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
898                 
899                 return 0;
900                 
901         }
902         catch(exception& e) {
903                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
904                 exit(1);
905         }
906 }
907 //**********************************************************************************************************************
908