]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> RemoveLineageCommand::setParameters(){   
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
22                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
23                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,true,true); parameters.push_back(ptaxonomy);
24                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
25                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon);
26                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "RemoveLineageCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string RemoveLineageCommand::getHelpString(){   
41         try {
42                 string helpString = "";
43                 helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
45                 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
48                 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
49                 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
50                 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
51                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
52                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
53                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
54                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
55                 return helpString;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 string RemoveLineageCommand::getOutputPattern(string type) {
64     try {
65         string pattern = "";
66         
67         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
68         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
69         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
70         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
71         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
72         else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
73         else if (type == "alignreport")      {   pattern = "[filename],pick.align.report";    }
74         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
75         
76         return pattern;
77     }
78     catch(exception& e) {
79         m->errorOut(e, "RemoveLineageCommand", "getOutputPattern");
80         exit(1);
81     }
82 }
83
84 //**********************************************************************************************************************
85 RemoveLineageCommand::RemoveLineageCommand(){   
86         try {
87                 abort = true; calledHelp = true; 
88                 setParameters();
89                 vector<string> tempOutNames;
90                 outputTypes["fasta"] = tempOutNames;
91                 outputTypes["taxonomy"] = tempOutNames;
92                 outputTypes["name"] = tempOutNames;
93                 outputTypes["group"] = tempOutNames;
94                 outputTypes["alignreport"] = tempOutNames;
95                 outputTypes["list"] = tempOutNames;
96         outputTypes["count"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 RemoveLineageCommand::RemoveLineageCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();       
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["fasta"] = tempOutNames;
129                         outputTypes["taxonomy"] = tempOutNames;
130                         outputTypes["name"] = tempOutNames;
131                         outputTypes["group"] = tempOutNames;
132                         outputTypes["alignreport"] = tempOutNames;
133                         outputTypes["list"] = tempOutNames;
134             outputTypes["count"] = tempOutNames;
135                         
136                         //if the user changes the output directory command factory will send this info to us in the output parameter 
137                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
138                         
139                         //if the user changes the input directory command factory will send this info to us in the output parameter 
140                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
141                         if (inputDir == "not found"){   inputDir = "";          }
142                         else {
143                                 string path;
144                                 it = parameters.find("alignreport");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
150                                 }
151                                 
152                                 it = parameters.find("fasta");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
158                                 }
159                                 
160                                 it = parameters.find("list");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
166                                 }
167                                 
168                                 it = parameters.find("name");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
174                                 }
175                                 
176                                 it = parameters.find("group");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
182                                 }
183                                 
184                                 it = parameters.find("taxonomy");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
190                                 }
191                 
192                 it = parameters.find("count");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
198                                 }
199                         }
200
201                         
202                         //check for required parameters                 
203                         fastafile = validParameter.validFile(parameters, "fasta", true);
204                         if (fastafile == "not open") { fastafile = ""; abort = true; }
205                         else if (fastafile == "not found") {  fastafile = "";  }        
206                         else { m->setFastaFile(fastafile); }
207                         
208                         namefile = validParameter.validFile(parameters, "name", true);
209                         if (namefile == "not open") { namefile = ""; abort = true; }
210                         else if (namefile == "not found") {  namefile = "";  }  
211                         else { m->setNameFile(namefile); }
212                         
213                         groupfile = validParameter.validFile(parameters, "group", true);
214                         if (groupfile == "not open") { abort = true; }
215                         else if (groupfile == "not found") {  groupfile = "";  }        
216                         else { m->setGroupFile(groupfile); }
217                         
218                         alignfile = validParameter.validFile(parameters, "alignreport", true);
219                         if (alignfile == "not open") { abort = true; }
220                         else if (alignfile == "not found") {  alignfile = "";  }
221                         
222                         listfile = validParameter.validFile(parameters, "list", true);
223                         if (listfile == "not open") { abort = true; }
224                         else if (listfile == "not found") {  listfile = "";  }
225                         else { m->setListFile(listfile); }
226                         
227                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
228                         if (taxfile == "not open") { taxfile = ""; abort = true; }
229                         else if (taxfile == "not found") {              
230                                 taxfile = m->getTaxonomyFile(); 
231                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
232                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
233                         }else { m->setTaxonomyFile(taxfile); }
234                         
235             countfile = validParameter.validFile(parameters, "count", true);
236             if (countfile == "not open") { countfile = ""; abort = true; }
237             else if (countfile == "not found") { countfile = "";  }     
238             else { m->setCountTableFile(countfile); }
239             
240             if ((namefile != "") && (countfile != "")) {
241                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
242             }
243             
244             if ((groupfile != "") && (countfile != "")) {
245                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
246             }
247             
248                         string usedDups = "true";
249                         string temp = validParameter.validFile(parameters, "dups", false);      
250                         if (temp == "not found") { 
251                                 if (namefile != "") {  temp = "true";                                   }
252                                 else                            {  temp = "false"; usedDups = "";       }
253                         }
254                         dups = m->isTrue(temp);
255                         
256                         taxons = validParameter.validFile(parameters, "taxon", false);  
257                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
258                         else { 
259                                 //rip off quotes
260                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
261                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
262                         }
263                         m->splitAtChar(taxons, listOfTaxons, '-');
264                         
265                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
266                 
267                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
268                         
269                         if (countfile == "") {
270                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
271                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
272                     parser.getNameFile(files);
273                 }
274             }
275                         
276                 }
277
278         }
279         catch(exception& e) {
280                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
281                 exit(1);
282         }
283 }
284 //**********************************************************************************************************************
285
286 int RemoveLineageCommand::execute(){
287         try {
288                 
289                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
290                 
291                 if (m->control_pressed) { return 0; }
292         
293         if (countfile != "") {
294             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
295                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
296             }
297         }
298                 
299                 //read through the correct file and output lines you want to keep
300                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
301                 if (namefile != "")                     {               readName();             }
302                 if (fastafile != "")            {               readFasta();    }
303                 if (groupfile != "")            {               readGroup();    }
304                 if (alignfile != "")            {               readAlign();    }
305                 if (listfile != "")                     {               readList();             }
306         if (countfile != "")            {               readCount();    }
307                 
308                 
309                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
310                 
311                 if (outputNames.size() != 0) {
312                         m->mothurOutEndLine();
313                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
314                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
315                         m->mothurOutEndLine();
316                         
317                         //set fasta file as new current fastafile
318                         string current = "";
319                         itTypes = outputTypes.find("fasta");
320                         if (itTypes != outputTypes.end()) {
321                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
322                         }
323                         
324                         itTypes = outputTypes.find("name");
325                         if (itTypes != outputTypes.end()) {
326                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
327                         }
328                         
329                         itTypes = outputTypes.find("group");
330                         if (itTypes != outputTypes.end()) {
331                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
332                         }
333                         
334                         itTypes = outputTypes.find("list");
335                         if (itTypes != outputTypes.end()) {
336                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
337                         }
338                         
339                         itTypes = outputTypes.find("taxonomy");
340                         if (itTypes != outputTypes.end()) {
341                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
342                         }
343             
344             itTypes = outputTypes.find("count");
345                         if (itTypes != outputTypes.end()) {
346                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
347                         }
348                 }
349                 
350                 return 0;               
351         }
352
353         catch(exception& e) {
354                 m->errorOut(e, "RemoveLineageCommand", "execute");
355                 exit(1);
356         }
357 }
358
359 //**********************************************************************************************************************
360 int RemoveLineageCommand::readFasta(){
361         try {
362                 string thisOutputDir = outputDir;
363                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
364                 map<string, string> variables; 
365         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
366         variables["[extension]"] = m->getExtension(fastafile);
367                 string outputFileName = getOutputFileName("fasta", variables);  
368                 ofstream out;
369                 m->openOutputFile(outputFileName, out);
370                 
371                 ifstream in;
372                 m->openInputFile(fastafile, in);
373                 string name;
374                 
375                 bool wroteSomething = false;
376                 
377                 while(!in.eof()){
378                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
379                         
380                         Sequence currSeq(in);
381                         name = currSeq.getName();
382                         
383                         if (name != "") {
384                                 //if this name is in the accnos file
385                                 if (names.count(name) == 0) {
386                                         wroteSomething = true;
387                                         
388                                         currSeq.printSequence(out);
389                                 }
390                         }
391                         m->gobble(in);
392                 }
393                 in.close();     
394                 out.close();
395                 
396                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
397                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
398                 
399                 return 0;
400                 
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
404                 exit(1);
405         }
406 }
407 //**********************************************************************************************************************
408 int RemoveLineageCommand::readList(){
409         try {
410                 string thisOutputDir = outputDir;
411                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
412                 map<string, string> variables; 
413         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
414         variables["[extension]"] = m->getExtension(listfile);
415                 string outputFileName = getOutputFileName("list", variables);   
416                 ofstream out;
417                 m->openOutputFile(outputFileName, out);
418                 
419                 ifstream in;
420                 m->openInputFile(listfile, in);
421                 
422                 bool wroteSomething = false;
423                 
424                 while(!in.eof()){
425                         //read in list vector
426                         ListVector list(in);
427                         
428                         //make a new list vector
429                         ListVector newList;
430                         newList.setLabel(list.getLabel());
431                         
432                         //for each bin
433                         for (int i = 0; i < list.getNumBins(); i++) {
434                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
435                         
436                                 //parse out names that are in accnos file
437                                 string binnames = list.get(i);
438                                 
439                                 string newNames = "";
440                                 while (binnames.find_first_of(',') != -1) { 
441                                         string name = binnames.substr(0,binnames.find_first_of(','));
442                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
443                                         
444                                         //if that name is in the .accnos file, add it
445                                         if (names.count(name) == 0) {  newNames += name + ",";  }
446                                 }
447                         
448                                 //get last name
449                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
450
451                                 //if there are names in this bin add to new list
452                                 if (newNames != "") {  
453                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
454                                         newList.push_back(newNames);    
455                                 }
456                         }
457                                 
458                         //print new listvector
459                         if (newList.getNumBins() != 0) {
460                                 wroteSomething = true;
461                                 newList.print(out);
462                         }
463                         
464                         m->gobble(in);
465                 }
466                 in.close();     
467                 out.close();
468                 
469                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
470                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
471                                 
472                 return 0;
473
474         }
475         catch(exception& e) {
476                 m->errorOut(e, "RemoveLineageCommand", "readList");
477                 exit(1);
478         }
479 }
480 //**********************************************************************************************************************
481 int RemoveLineageCommand::readName(){
482         try {
483                 string thisOutputDir = outputDir;
484                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
485                 map<string, string> variables; 
486                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
487         variables["[extension]"] = m->getExtension(namefile);
488                 string outputFileName = getOutputFileName("name", variables);
489                 ofstream out;
490                 m->openOutputFile(outputFileName, out);
491
492                 ifstream in;
493                 m->openInputFile(namefile, in);
494                 string name, firstCol, secondCol;
495                 
496                 bool wroteSomething = false;
497                 
498                 while(!in.eof()){
499                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
500
501                         in >> firstCol;                         
502                         in >> secondCol;                        
503
504                         vector<string> parsedNames;
505                         m->splitAtComma(secondCol, parsedNames);
506                         
507                         vector<string> validSecond;  validSecond.clear();
508                         for (int i = 0; i < parsedNames.size(); i++) {
509                                 if (names.count(parsedNames[i]) == 0) {
510                                         validSecond.push_back(parsedNames[i]);
511                                 }
512                         }
513                         
514                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
515                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
516                         }else {
517                                         //if the name in the first column is in the set then print it and any other names in second column also in set
518                                 if (names.count(firstCol) == 0) {
519                                         
520                                         wroteSomething = true;
521                                         
522                                         out << firstCol << '\t';
523                                         
524                                         //you know you have at least one valid second since first column is valid
525                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
526                                         out << validSecond[validSecond.size()-1] << endl;
527                                         
528                                         //make first name in set you come to first column and then add the remaining names to second column
529                                 }else {
530                                         
531                                         //you want part of this row
532                                         if (validSecond.size() != 0) {
533                                                 
534                                                 wroteSomething = true;
535                                                 
536                                                 out << validSecond[0] << '\t';
537                                                 
538                                                 //you know you have at least one valid second since first column is valid
539                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
540                                                 out << validSecond[validSecond.size()-1] << endl;
541                                         }
542                                 }
543                         }
544                         m->gobble(in);
545                 }
546                 in.close();
547                 out.close();
548
549                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
550                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
551                                 
552                 return 0;
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "RemoveLineageCommand", "readName");
556                 exit(1);
557         }
558 }
559 //**********************************************************************************************************************
560 int RemoveLineageCommand::readCount(){
561         try {
562                 string thisOutputDir = outputDir;
563                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
564                 map<string, string> variables; 
565                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
566         variables["[extension]"] = m->getExtension(countfile);
567                 string outputFileName = getOutputFileName("count", variables);
568
569                 
570                 ofstream out;
571                 m->openOutputFile(outputFileName, out);
572                 
573                 ifstream in;
574                 m->openInputFile(countfile, in);
575                 
576                 bool wroteSomething = false;
577                 
578         string headers = m->getline(in); m->gobble(in);
579         out << headers << endl;
580         
581         string name, rest; int thisTotal;
582         while (!in.eof()) {
583             
584             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
585             
586             in >> name; m->gobble(in); 
587             in >> thisTotal; m->gobble(in);
588             rest = m->getline(in); m->gobble(in);
589             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
590             
591             if (names.count(name) == 0) {
592                 out << name << '\t' << thisTotal << '\t' << rest << endl;
593                 wroteSomething = true;
594             }
595         }
596         in.close();
597                 out.close();
598         
599         //check for groups that have been eliminated
600         CountTable ct;
601         if (ct.testGroups(outputFileName)) {
602             ct.readTable(outputFileName);
603             ct.printTable(outputFileName);
604         }
605                 
606                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
607                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
608         
609                 return 0;
610         }
611         catch(exception& e) {
612                 m->errorOut(e, "RemoveLineageCommand", "readCount");
613                 exit(1);
614         }
615 }
616 //**********************************************************************************************************************
617 int RemoveLineageCommand::readGroup(){
618         try {
619                 string thisOutputDir = outputDir;
620                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
621                 map<string, string> variables; 
622                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
623         variables["[extension]"] = m->getExtension(groupfile);
624                 string outputFileName = getOutputFileName("group", variables);
625         
626                 ofstream out;
627                 m->openOutputFile(outputFileName, out);
628
629                 ifstream in;
630                 m->openInputFile(groupfile, in);
631                 string name, group;
632                 
633                 bool wroteSomething = false;
634                 
635                 while(!in.eof()){
636                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
637                         
638                         in >> name;                             //read from first column
639                         in >> group;                    //read from second column
640                         
641                         //if this name is in the accnos file
642                         if (names.count(name) == 0) {
643                                 wroteSomething = true;
644                                 out << name << '\t' << group << endl;
645                         }
646                                         
647                         m->gobble(in);
648                 }
649                 in.close();
650                 out.close();
651                 
652                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
653                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
654                 
655                 return 0;
656         }
657         catch(exception& e) {
658                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
659                 exit(1);
660         }
661 }
662 //**********************************************************************************************************************
663 int RemoveLineageCommand::readTax(){
664         try {
665                 string thisOutputDir = outputDir;
666                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
667                 map<string, string> variables; 
668                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
669         variables["[extension]"] = m->getExtension(taxfile);
670                 string outputFileName = getOutputFileName("taxonomy", variables);
671                 ofstream out;
672                 m->openOutputFile(outputFileName, out);
673                 
674                 ifstream in;
675                 m->openInputFile(taxfile, in);
676                 string name, tax;
677                 
678                 bool wroteSomething = false;
679                 
680                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
681                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
682                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
683                 
684                 for (int i = 0; i < listOfTaxons.size(); i++) {
685                         noConfidenceTaxons[i] = listOfTaxons[i];
686                         int hasConPos = listOfTaxons[i].find_first_of('(');
687                         if (hasConPos != string::npos) {  
688                                 taxonsHasConfidence[i] = true; 
689                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
690                                 noConfidenceTaxons[i] = listOfTaxons[i];
691                                 m->removeConfidences(noConfidenceTaxons[i]);
692                         }
693                 }
694                 
695                 
696                 while(!in.eof()){
697
698                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
699
700                         in >> name;                             //read from first column
701                         in >> tax;                      //read from second column
702                         
703                         bool remove = false;
704                         
705             string noQuotesTax = m->removeQuotes(tax);
706             
707                         for (int j = 0; j < listOfTaxons.size(); j++) {
708                                 string newtax = noQuotesTax;
709                                 
710                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
711                                 if (!taxonsHasConfidence[j]) {
712                                         
713                                         int hasConfidences = noQuotesTax.find_first_of('(');
714                                         if (hasConfidences != string::npos) { 
715                                                 newtax = noQuotesTax;
716                                                 m->removeConfidences(newtax);
717                                         }
718                                         
719                                         int pos = newtax.find(noConfidenceTaxons[j]);
720                                         
721                                         if (pos == string::npos) { 
722                                                 //wroteSomething = true;
723                                                 //out << name << '\t' << tax << endl;
724                                         }else{ //this sequence contains the taxon the user wants to remove
725                                                 names.insert(name);
726                                                 remove=true; break;
727                                         }
728                                         
729                                 }else{//if taxons has them and you don't them remove taxons
730                                         int hasConfidences = noQuotesTax.find_first_of('(');
731                                         if (hasConfidences == string::npos) { 
732                                                 
733                                                 int pos = newtax.find(noConfidenceTaxons[j]);
734                                                 
735                                                 if (pos == string::npos) { 
736                                                         //wroteSomething = true;
737                                                         //out << name << '\t' << tax << endl;
738                                                 }else{ //this sequence contains the taxon the user wants to remove
739                                                         names.insert(name);
740                                                         remove=true; break;
741                                                 }
742                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
743                                                 //first remove confidences from both and see if the taxonomy exists
744                                                 
745                                                 string noNewTax = noQuotesTax;
746                                                 int hasConfidences = noQuotesTax.find_first_of('(');
747                                                 if (hasConfidences != string::npos) { 
748                                                         noNewTax = noQuotesTax;
749                                                         m->removeConfidences(noNewTax);
750                                                 }
751                                                 
752                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
753                                                 
754                                                 if (pos != string::npos) { //if yes, then are the confidences okay
755                                                         
756                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
757                                                         
758                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
759                                                         //we want to "line them up", so we will find the the index where the searchstring starts
760                                                         int index = 0;
761                                                         for (int i = 0; i < usersTaxon.size(); i++) {
762                                                                 
763                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
764                                                                         index = i;  
765                                                                         int spot = 0;
766                                                                         bool goodspot = true;
767                                                                         //is this really the start, or are we dealing with a taxon of the same name?
768                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
769                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
770                                                                                 else { spot++; }
771                                                                         }
772                                                                         
773                                                                         if (goodspot) { break; }
774                                                                 }
775                                                         }
776                                                         
777                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
778                                                                 
779                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
780                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
781                                                                                 remove = true;
782                                                                                 break;
783                                                                         }
784                                                                 }else {
785                                                                         remove = true;
786                                                                         break;
787                                                                 }
788                                                         }
789                                                         
790                                                         //passed the test so remove you
791                                                         if (remove) {
792                                                                 names.insert(name);
793                                                                 remove=true; break;
794                                                         }else {
795                                                                 //wroteSomething = true;
796                                                                 //out << name << '\t' << tax << endl;
797                                                         }
798                                                 }else {
799                                                         //wroteSomething = true;
800                                                         //out << name << '\t' << tax << endl;
801                                                 }
802                                         }
803                                 }
804                                 
805                         }
806                         
807                         if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
808                         m->gobble(in);
809                 }
810                 in.close();
811                 out.close();
812                 
813                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
814                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
815                         
816                 return 0;
817
818         }
819         catch(exception& e) {
820                 m->errorOut(e, "RemoveLineageCommand", "readTax");
821                 exit(1);
822         }
823 }
824 /**************************************************************************************************/
825 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
826         try {
827                 
828                 vector< map<string, float> > t;
829                 string taxon = "";
830                 int taxLength = tax.length();
831                 for(int i=0;i<taxLength;i++){
832                         if(tax[i] == ';'){
833                 
834                                 int openParen = taxon.find_last_of('(');
835                                 int closeParen = taxon.find_last_of(')');
836                                 
837                                 string newtaxon, confidence;
838                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
839                     string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
840                     if (m->isNumeric1(confidenceScore)) {  //its a confidence
841                         newtaxon = taxon.substr(0, openParen); //rip off confidence
842                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
843                     }else { //its part of the taxon
844                         newtaxon = taxon;
845                         confidence = "0";
846                     }
847                                 }else{
848                                         newtaxon = taxon;
849                                         confidence = "0";
850                                 } 
851                                 float con = 0;
852                                 convert(confidence, con);
853                                 
854                                 map<string, float> temp;
855                                 temp[newtaxon] = con;
856                                 t.push_back(temp);
857                                 taxon = "";
858                         }
859                         else{
860                                 taxon += tax[i];
861                         }
862                 }
863                 
864                 return t;
865         }
866         catch(exception& e) {
867                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
868                 exit(1);
869         }
870 }
871 //**********************************************************************************************************************
872 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
873 int RemoveLineageCommand::readAlign(){
874         try {
875                 string thisOutputDir = outputDir;
876                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
877         map<string, string> variables; 
878                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
879         variables["[extension]"] = m->getExtension(alignfile);
880                 string outputFileName = getOutputFileName("alignreport", variables);
881                 
882                 ofstream out;
883                 m->openOutputFile(outputFileName, out);
884
885                 ifstream in;
886                 m->openInputFile(alignfile, in);
887                 string name, junk;
888                 
889                 bool wroteSomething = false;
890                 
891                 //read column headers
892                 for (int i = 0; i < 16; i++) {  
893                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
894                         else                    {       break;                  }
895                 }
896                 out << endl;
897                 
898                 while(!in.eof()){
899                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
900                         
901                         in >> name;                             //read from first column
902                         
903                         //if this name is in the accnos file
904                         if (names.count(name) == 0) {
905                                 wroteSomething = true;
906                                 
907                                 out << name << '\t';
908                                 
909                                 //read rest
910                                 for (int i = 0; i < 15; i++) {  
911                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
912                                         else                    {       break;                  }
913                                 }
914                                 out << endl;
915                                 
916                         }else {//still read just don't do anything with it
917                                 
918                                 //read rest
919                                 for (int i = 0; i < 15; i++) {  
920                                         if (!in.eof())  {       in >> junk;             }
921                                         else                    {       break;                  }
922                                 }
923                         }
924                         
925                         m->gobble(in);
926                 }
927                 in.close();
928                 out.close();
929                 
930                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
931                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
932                 
933                 return 0;
934                 
935         }
936         catch(exception& e) {
937                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
938                 exit(1);
939         }
940 }
941 //**********************************************************************************************************************
942