]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
fixes while testing 1.33.0
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> RemoveLineageCommand::setParameters(){   
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24         CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
25                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
26         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
27                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
28         CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
29                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon);
30                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "RemoveLineageCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string RemoveLineageCommand::getHelpString(){   
45         try {
46                 string helpString = "";
47                 helpString += "The remove.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n";
48                 helpString += "It outputs a file containing only the sequences or OTUS from the taxonomy file that are not from the taxon you requested to be removed.\n";
49                 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups.  You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n";
50                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
51                 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
52                 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
53                 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
54         helpString += "The label parameter is used to analyze specific labels in your input. \n";
55                 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
56                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
57                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
58                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string RemoveLineageCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         
72         if (type == "fasta")                {   pattern = "[filename],pick,[extension]";    }
73         else if (type == "taxonomy")        {   pattern = "[filename],pick,[extension]";    }
74         else if (type == "constaxonomy")    {   pattern = "[filename],pick,[extension]";    }
75         else if (type == "name")            {   pattern = "[filename],pick,[extension]";    }
76         else if (type == "group")           {   pattern = "[filename],pick,[extension]";    }
77         else if (type == "count")           {   pattern = "[filename],pick,[extension]";    }
78         else if (type == "list")            {   pattern = "[filename],[distance],pick,[extension]";    }
79         else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
80         else if (type == "alignreport")     {   pattern = "[filename],pick.align.report";    }
81         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
82         
83         return pattern;
84     }
85     catch(exception& e) {
86         m->errorOut(e, "RemoveLineageCommand", "getOutputPattern");
87         exit(1);
88     }
89 }
90
91 //**********************************************************************************************************************
92 RemoveLineageCommand::RemoveLineageCommand(){   
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["fasta"] = tempOutNames;
98                 outputTypes["taxonomy"] = tempOutNames;
99                 outputTypes["name"] = tempOutNames;
100                 outputTypes["group"] = tempOutNames;
101                 outputTypes["alignreport"] = tempOutNames;
102                 outputTypes["list"] = tempOutNames;
103         outputTypes["count"] = tempOutNames;
104         outputTypes["constaxonomy"] = tempOutNames;
105         outputTypes["shared"] = tempOutNames;
106
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
110                 exit(1);
111         }
112 }
113 //**********************************************************************************************************************
114 RemoveLineageCommand::RemoveLineageCommand(string option)  {
115         try {
116                 abort = false; calledHelp = false;   
117                                 
118                 //allow user to run help
119                 if(option == "help") { help(); abort = true; calledHelp = true; }
120                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
121                 
122                 else {
123                         vector<string> myArray = setParameters();       
124                         
125                         OptionParser parser(option);
126                         map<string,string> parameters = parser.getParameters();
127                         
128                         ValidParameters validParameter;
129                         map<string,string>::iterator it;
130                         
131                         //check to make sure all parameters are valid for command
132                         for (it = parameters.begin(); it != parameters.end(); it++) { 
133                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
134                         }
135                         
136                         //initialize outputTypes
137                         vector<string> tempOutNames;
138                         outputTypes["fasta"] = tempOutNames;
139                         outputTypes["taxonomy"] = tempOutNames;
140                         outputTypes["name"] = tempOutNames;
141                         outputTypes["group"] = tempOutNames;
142                         outputTypes["alignreport"] = tempOutNames;
143                         outputTypes["list"] = tempOutNames;
144             outputTypes["count"] = tempOutNames;
145             outputTypes["constaxonomy"] = tempOutNames;
146             outputTypes["shared"] = tempOutNames;
147
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter 
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("alignreport");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
163                                 }
164                                 
165                                 it = parameters.find("fasta");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
171                                 }
172                                 
173                                 it = parameters.find("list");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                                 it = parameters.find("group");
190                                 //user has given a template file
191                                 if(it != parameters.end()){ 
192                                         path = m->hasPath(it->second);
193                                         //if the user has not given a path then, add inputdir. else leave path alone.
194                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
195                                 }
196                                 
197                                 it = parameters.find("taxonomy");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
203                                 }
204                 
205                 it = parameters.find("count");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
211                                 }
212                 
213                 it = parameters.find("constaxonomy");
214                                 //user has given a template file
215                                 if(it != parameters.end()){
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
219                                 }
220                 
221                 it = parameters.find("shared");
222                                 //user has given a template file
223                                 if(it != parameters.end()){
224                                         path = m->hasPath(it->second);
225                                         //if the user has not given a path then, add inputdir. else leave path alone.
226                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
227                                 }
228                         }
229
230                         
231                         //check for required parameters                 
232                         fastafile = validParameter.validFile(parameters, "fasta", true);
233                         if (fastafile == "not open") { fastafile = ""; abort = true; }
234                         else if (fastafile == "not found") {  fastafile = "";  }        
235                         else { m->setFastaFile(fastafile); }
236                         
237                         namefile = validParameter.validFile(parameters, "name", true);
238                         if (namefile == "not open") { namefile = ""; abort = true; }
239                         else if (namefile == "not found") {  namefile = "";  }  
240                         else { m->setNameFile(namefile); }
241                         
242                         groupfile = validParameter.validFile(parameters, "group", true);
243                         if (groupfile == "not open") { abort = true; }
244                         else if (groupfile == "not found") {  groupfile = "";  }        
245                         else { m->setGroupFile(groupfile); }
246                         
247                         alignfile = validParameter.validFile(parameters, "alignreport", true);
248                         if (alignfile == "not open") { abort = true; }
249                         else if (alignfile == "not found") {  alignfile = "";  }
250                         
251                         listfile = validParameter.validFile(parameters, "list", true);
252                         if (listfile == "not open") { abort = true; }
253                         else if (listfile == "not found") {  listfile = "";  }
254                         else { m->setListFile(listfile); }
255                         
256                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
257                         if (taxfile == "not open") { taxfile = ""; abort = true; }
258                         else if (taxfile == "not found") {  taxfile = "";               }
259                         else { m->setTaxonomyFile(taxfile); }
260             
261             sharedfile = validParameter.validFile(parameters, "shared", true);
262                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
263                         else if (sharedfile == "not found") {  sharedfile = "";         }
264                         else { m->setSharedFile(sharedfile); }
265             
266             
267             constaxonomy = validParameter.validFile(parameters, "constaxonomy", true);
268                         if (constaxonomy == "not open") { constaxonomy = ""; abort = true; }
269                         else if (constaxonomy == "not found") {  constaxonomy = "";             }
270             
271             if ((constaxonomy == "") && (taxfile == "")) {
272                 taxfile = m->getTaxonomyFile();
273                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
274                 else {
275                     m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
276                         }
277
278                         
279             countfile = validParameter.validFile(parameters, "count", true);
280             if (countfile == "not open") { countfile = ""; abort = true; }
281             else if (countfile == "not found") { countfile = "";  }     
282             else { m->setCountTableFile(countfile); }
283             
284             if ((namefile != "") && (countfile != "")) {
285                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
286             }
287             
288             if ((groupfile != "") && (countfile != "")) {
289                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
290             }
291             
292                         string usedDups = "true";
293                         string temp = validParameter.validFile(parameters, "dups", false);      
294                         if (temp == "not found") { 
295                                 if (namefile != "") {  temp = "true";                                   }
296                                 else                            {  temp = "false"; usedDups = "";       }
297                         }
298                         dups = m->isTrue(temp);
299                         
300                         taxons = validParameter.validFile(parameters, "taxon", false);  
301                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
302                         else { 
303                                 //rip off quotes
304                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
305                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
306                         }
307                         m->splitAtChar(taxons, listOfTaxons, '-');
308                         
309                         if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile."); m->mothurOutEndLine(); abort = true; }
310             
311             if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) {
312                 m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n");  abort = true;
313             }
314             
315             if ((constaxonomy != "") && (taxfile != "")) {
316                 m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true;
317             }
318             
319             if ((sharedfile != "") && (taxfile != "")) {
320                 m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true;
321             }
322             
323             if ((sharedfile != "") || (listfile != "")) {
324                 label = validParameter.validFile(parameters, "label", false);
325                 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
326             }
327
328                 
329                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
330                         
331                         if (countfile == "") {
332                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
333                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
334                     parser.getNameFile(files);
335                 }
336             }
337                         
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
343                 exit(1);
344         }
345 }
346 //**********************************************************************************************************************
347
348 int RemoveLineageCommand::execute(){
349         try {
350                 
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 if (m->control_pressed) { return 0; }
354         
355         if (countfile != "") {
356             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
357                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
358             }
359         }
360                 
361                 //read through the correct file and output lines you want to keep
362                 if (taxfile != "")                      {
363             readTax(); //fills the set of names to get
364             if (namefile != "")                 {               readName();             }
365             if (fastafile != "")                {               readFasta();    }
366             if (countfile != "")                {               readCount();    }
367             if (groupfile != "")                {               readGroup();    }
368             if (alignfile != "")                {               readAlign();    }
369             if (listfile != "")                 {               readList();             }
370             
371         }else {
372             readConsTax();
373             if (listfile != "")                 {               readConsList();         }
374             if (sharedfile != "")               {               readShared();           }
375         }
376                 
377                 
378                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
379                 
380                 if (outputNames.size() != 0) {
381                         m->mothurOutEndLine();
382                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
383                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
384                         m->mothurOutEndLine();
385                         
386                         //set fasta file as new current fastafile
387                         string current = "";
388                         itTypes = outputTypes.find("fasta");
389                         if (itTypes != outputTypes.end()) {
390                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
391                         }
392                         
393                         itTypes = outputTypes.find("name");
394                         if (itTypes != outputTypes.end()) {
395                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
396                         }
397                         
398                         itTypes = outputTypes.find("group");
399                         if (itTypes != outputTypes.end()) {
400                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
401                         }
402                         
403                         itTypes = outputTypes.find("list");
404                         if (itTypes != outputTypes.end()) {
405                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
406                         }
407                         
408                         itTypes = outputTypes.find("taxonomy");
409                         if (itTypes != outputTypes.end()) {
410                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
411                         }
412             
413             itTypes = outputTypes.find("count");
414                         if (itTypes != outputTypes.end()) {
415                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
416                         }
417                 }
418                 
419                 return 0;               
420         }
421
422         catch(exception& e) {
423                 m->errorOut(e, "RemoveLineageCommand", "execute");
424                 exit(1);
425         }
426 }
427
428 //**********************************************************************************************************************
429 int RemoveLineageCommand::readFasta(){
430         try {
431                 string thisOutputDir = outputDir;
432                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
433                 map<string, string> variables; 
434         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
435         variables["[extension]"] = m->getExtension(fastafile);
436                 string outputFileName = getOutputFileName("fasta", variables);  
437                 ofstream out;
438                 m->openOutputFile(outputFileName, out);
439                 
440                 ifstream in;
441                 m->openInputFile(fastafile, in);
442                 string name;
443                 
444                 bool wroteSomething = false;
445                 
446                 while(!in.eof()){
447                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
448                         
449                         Sequence currSeq(in);
450                         name = currSeq.getName();
451                         
452                         if (name != "") {
453                                 //if this name is in the accnos file
454                                 if (names.count(name) == 0) {
455                                         wroteSomething = true;
456                                         
457                                         currSeq.printSequence(out);
458                                 }
459                         }
460                         m->gobble(in);
461                 }
462                 in.close();     
463                 out.close();
464                 
465                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
466                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
467                 
468                 return 0;
469                 
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
473                 exit(1);
474         }
475 }
476 //**********************************************************************************************************************
477 int RemoveLineageCommand::readList(){
478         try {
479                 string thisOutputDir = outputDir;
480                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
481                 map<string, string> variables; 
482         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
483         variables["[extension]"] = m->getExtension(listfile);
484                 
485                 ifstream in;
486                 m->openInputFile(listfile, in);
487                 
488                 bool wroteSomething = false;
489                 
490                 while(!in.eof()){
491             
492                         //read in list vector
493                         ListVector list(in);
494                         
495                         //make a new list vector
496                         ListVector newList;
497                         newList.setLabel(list.getLabel());
498             
499             variables["[distance]"] = list.getLabel();
500             string outputFileName = getOutputFileName("list", variables);
501                         
502                         ofstream out;
503                         m->openOutputFile(outputFileName, out);
504                         outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
505             
506             if (m->control_pressed) { in.close(); out.close(); return 0; }
507             
508             vector<string> binLabels = list.getLabels();
509             vector<string> newBinLabels;
510                         
511                         //for each bin
512                         for (int i = 0; i < list.getNumBins(); i++) {
513                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
514                         
515                                 //parse out names that are in accnos file
516                                 string binnames = list.get(i);
517                 vector<string> bnames;
518                 m->splitAtComma(binnames, bnames);
519                                 
520                                 string newNames = "";
521                 for (int j = 0; j < bnames.size(); j++) {
522                                         string name = bnames[j];
523                                         //if that name is in the .accnos file, add it
524                                         if (names.count(name) == 0) {  newNames += name + ",";  }
525                                 }
526
527                                 //if there are names in this bin add to new list
528                                 if (newNames != "") {  
529                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
530                                         newList.push_back(newNames);
531                     newBinLabels.push_back(binLabels[i]);
532                                 }
533                         }
534                                 
535                         //print new listvector
536                         if (newList.getNumBins() != 0) {
537                                 wroteSomething = true;
538                                 newList.setLabels(newBinLabels);
539                 newList.printHeaders(out);
540                                 newList.print(out);
541                         }
542                         
543                         m->gobble(in);
544             out.close();
545                 }
546                 in.close();     
547                 
548                 
549                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
550                                 
551                 return 0;
552
553         }
554         catch(exception& e) {
555                 m->errorOut(e, "RemoveLineageCommand", "readList");
556                 exit(1);
557         }
558 }
559 //**********************************************************************************************************************
560 int RemoveLineageCommand::readName(){
561         try {
562                 string thisOutputDir = outputDir;
563                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
564                 map<string, string> variables; 
565                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
566         variables["[extension]"] = m->getExtension(namefile);
567                 string outputFileName = getOutputFileName("name", variables);
568                 ofstream out;
569                 m->openOutputFile(outputFileName, out);
570
571                 ifstream in;
572                 m->openInputFile(namefile, in);
573                 string name, firstCol, secondCol;
574                 
575                 bool wroteSomething = false;
576                 
577                 while(!in.eof()){
578                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
579
580                         in >> firstCol;                         
581                         in >> secondCol;                        
582
583                         vector<string> parsedNames;
584                         m->splitAtComma(secondCol, parsedNames);
585                         
586                         vector<string> validSecond;  validSecond.clear();
587                         for (int i = 0; i < parsedNames.size(); i++) {
588                                 if (names.count(parsedNames[i]) == 0) {
589                                         validSecond.push_back(parsedNames[i]);
590                                 }
591                         }
592                         
593                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
594                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
595                         }else {
596                                         //if the name in the first column is in the set then print it and any other names in second column also in set
597                                 if (names.count(firstCol) == 0) {
598                                         
599                                         wroteSomething = true;
600                                         
601                                         out << firstCol << '\t';
602                                         
603                                         //you know you have at least one valid second since first column is valid
604                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
605                                         out << validSecond[validSecond.size()-1] << endl;
606                                         
607                                         //make first name in set you come to first column and then add the remaining names to second column
608                                 }else {
609                                         
610                                         //you want part of this row
611                                         if (validSecond.size() != 0) {
612                                                 
613                                                 wroteSomething = true;
614                                                 
615                                                 out << validSecond[0] << '\t';
616                                                 
617                                                 //you know you have at least one valid second since first column is valid
618                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
619                                                 out << validSecond[validSecond.size()-1] << endl;
620                                         }
621                                 }
622                         }
623                         m->gobble(in);
624                 }
625                 in.close();
626                 out.close();
627
628                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
629                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
630                                 
631                 return 0;
632         }
633         catch(exception& e) {
634                 m->errorOut(e, "RemoveLineageCommand", "readName");
635                 exit(1);
636         }
637 }
638 //**********************************************************************************************************************
639 int RemoveLineageCommand::readCount(){
640         try {
641                 string thisOutputDir = outputDir;
642                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
643                 map<string, string> variables; 
644                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
645         variables["[extension]"] = m->getExtension(countfile);
646                 string outputFileName = getOutputFileName("count", variables);
647
648                 
649                 ofstream out;
650                 m->openOutputFile(outputFileName, out);
651                 
652                 ifstream in;
653                 m->openInputFile(countfile, in);
654                 
655                 bool wroteSomething = false;
656                 
657         string headers = m->getline(in); m->gobble(in);
658         out << headers << endl;
659         
660         string name, rest; int thisTotal;
661         while (!in.eof()) {
662             
663             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
664             
665             in >> name; m->gobble(in); 
666             in >> thisTotal; m->gobble(in);
667             rest = m->getline(in); m->gobble(in);
668             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
669             
670             if (names.count(name) == 0) {
671                 out << name << '\t' << thisTotal << '\t' << rest << endl;
672                 wroteSomething = true;
673             }
674         }
675         in.close();
676                 out.close();
677         
678         //check for groups that have been eliminated
679         CountTable ct;
680         if (ct.testGroups(outputFileName)) {
681             ct.readTable(outputFileName, true, false);
682             ct.printTable(outputFileName);
683         }
684                 
685                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
686                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
687         
688                 return 0;
689         }
690         catch(exception& e) {
691                 m->errorOut(e, "RemoveLineageCommand", "readCount");
692                 exit(1);
693         }
694 }
695 //**********************************************************************************************************************
696 int RemoveLineageCommand::readConsList(){
697         try {
698                 getListVector();
699         
700         if (m->control_pressed) { delete list; return 0;}
701         
702         ListVector newList;
703         newList.setLabel(list->getLabel());
704         int removedCount = 0;
705         bool wroteSomething = false;
706         string snumBins = toString(list->getNumBins());
707         
708         vector<string> binLabels = list->getLabels();
709         vector<string> newBinLabels;
710         for (int i = 0; i < list->getNumBins(); i++) {
711             
712             if (m->control_pressed) { delete list; return 0;}
713             
714             //create a label for this otu
715             string otuLabel = "Otu";
716             string sbinNumber = toString(i+1);
717             if (sbinNumber.length() < snumBins.length()) {
718                 int diff = snumBins.length() - sbinNumber.length();
719                 for (int h = 0; h < diff; h++) { otuLabel += "0"; }
720             }
721             otuLabel += sbinNumber;
722             
723             if (names.count(m->getSimpleLabel(otuLabel)) == 0) {
724                 newList.push_back(list->get(i));
725                 newBinLabels.push_back(binLabels[i]);
726             }else { removedCount++; }
727         }
728         
729         string thisOutputDir = outputDir;
730                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
731         map<string, string> variables;
732                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
733         variables["[extension]"] = m->getExtension(listfile);
734         variables["[distance]"] = list->getLabel();
735                 string outputFileName = getOutputFileName("list", variables);
736                 ofstream out;
737                 m->openOutputFile(outputFileName, out);
738         
739                 delete list;
740         //print new listvector
741         if (newList.getNumBins() != 0) {
742             wroteSomething = true;
743             newList.setLabels(newBinLabels);
744             newList.printHeaders(out);
745             newList.print(out);
746         }
747                 out.close();
748                 
749                 if (wroteSomething == false) { m->mothurOut("Your file only contains OTUs from " + taxons + "."); m->mothurOutEndLine();  }
750                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
751                 
752                 m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file."); m->mothurOutEndLine();
753         
754                 return 0;
755         
756         }
757         catch(exception& e) {
758                 m->errorOut(e, "RemoveLineageCommand", "readConsList");
759                 exit(1);
760         }
761 }
762 //**********************************************************************************************************************
763 int RemoveLineageCommand::getListVector(){
764         try {
765                 InputData input(listfile, "list");
766                 list = input.getListVector();
767                 string lastLabel = list->getLabel();
768                 
769                 if (label == "") { label = lastLabel;  return 0; }
770                 
771                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
772                 set<string> labels; labels.insert(label);
773                 set<string> processedLabels;
774                 set<string> userLabels = labels;
775                 
776                 //as long as you are not at the end of the file or done wih the lines you want
777                 while((list != NULL) && (userLabels.size() != 0)) {
778                         if (m->control_pressed) {  return 0;  }
779                         
780                         if(labels.count(list->getLabel()) == 1){
781                                 processedLabels.insert(list->getLabel());
782                                 userLabels.erase(list->getLabel());
783                                 break;
784                         }
785                         
786                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
787                                 string saveLabel = list->getLabel();
788                                 
789                                 delete list;
790                                 list = input.getListVector(lastLabel);
791                                 
792                                 processedLabels.insert(list->getLabel());
793                                 userLabels.erase(list->getLabel());
794                                 
795                                 //restore real lastlabel to save below
796                                 list->setLabel(saveLabel);
797                                 break;
798                         }
799                         
800                         lastLabel = list->getLabel();
801                         
802                         //get next line to process
803                         //prevent memory leak
804                         delete list;
805                         list = input.getListVector();
806                 }
807                 
808                 
809                 if (m->control_pressed) {  return 0;  }
810                 
811                 //output error messages about any remaining user labels
812                 set<string>::iterator it;
813                 bool needToRun = false;
814                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
815                         m->mothurOut("Your file does not include the label " + *it);
816                         if (processedLabels.count(lastLabel) != 1) {
817                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
818                                 needToRun = true;
819                         }else {
820                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
821                         }
822                 }
823                 
824                 //run last label if you need to
825                 if (needToRun == true)  {
826                         delete list;
827                         list = input.getListVector(lastLabel);
828                 }
829                 
830                 return 0;
831         }
832         catch(exception& e) {
833                 m->errorOut(e, "RemoveLineageCommand", "getListVector");
834                 exit(1);
835         }
836 }
837
838 //**********************************************************************************************************************
839 int RemoveLineageCommand::readShared(){
840         try {
841         
842         getShared();
843         
844         if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
845         
846         vector<string> newLabels;
847         
848         //create new "filtered" lookup
849         vector<SharedRAbundVector*> newLookup;
850         for (int i = 0; i < lookup.size(); i++) {
851             SharedRAbundVector* temp = new SharedRAbundVector();
852                         temp->setLabel(lookup[i]->getLabel());
853                         temp->setGroup(lookup[i]->getGroup());
854                         newLookup.push_back(temp);
855         }
856         
857         bool wroteSomething = false;
858         int numRemoved = 0;
859         for (int i = 0; i < lookup[0]->getNumBins(); i++) {
860             
861             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
862             
863             //is this otu on the list
864             if (names.count(m->getSimpleLabel(m->currentSharedBinLabels[i])) == 0) {
865                 wroteSomething = true;
866                 newLabels.push_back(m->currentSharedBinLabels[i]);
867                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
868                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
869                 }
870             }else { numRemoved++; }
871         }
872         
873         string thisOutputDir = outputDir;
874                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
875         map<string, string> variables;
876                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
877         variables["[extension]"] = m->getExtension(sharedfile);
878         variables["[distance]"] = lookup[0]->getLabel();
879                 string outputFileName = getOutputFileName("shared", variables);
880         ofstream out;
881                 m->openOutputFile(outputFileName, out);
882                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
883         
884                 for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
885         
886         m->currentSharedBinLabels = newLabels;
887         
888                 newLookup[0]->printHeaders(out);
889                 
890                 for (int i = 0; i < newLookup.size(); i++) {
891                         out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
892                         newLookup[i]->print(out);
893                 }
894                 out.close();
895         
896         for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
897         
898         if (wroteSomething == false) { m->mothurOut("Your file only contains OTUs from " + taxons + "."); m->mothurOutEndLine();  }
899         
900                 m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file."); m->mothurOutEndLine();
901         
902         return 0;
903     }
904         catch(exception& e) {
905                 m->errorOut(e, "RemoveLineageCommand", "readShared");
906                 exit(1);
907         }
908 }
909 //**********************************************************************************************************************
910 int RemoveLineageCommand::getShared(){
911         try {
912                 InputData input(sharedfile, "sharedfile");
913                 lookup = input.getSharedRAbundVectors();
914                 string lastLabel = lookup[0]->getLabel();
915                 
916                 if (label == "") { label = lastLabel;  return 0; }
917                 
918                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
919                 set<string> labels; labels.insert(label);
920                 set<string> processedLabels;
921                 set<string> userLabels = labels;
922                 
923                 //as long as you are not at the end of the file or done wih the lines you want
924                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
925                         if (m->control_pressed) {   return 0;  }
926                         
927                         if(labels.count(lookup[0]->getLabel()) == 1){
928                                 processedLabels.insert(lookup[0]->getLabel());
929                                 userLabels.erase(lookup[0]->getLabel());
930                                 break;
931                         }
932                         
933                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
934                                 string saveLabel = lookup[0]->getLabel();
935                                 
936                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
937                                 lookup = input.getSharedRAbundVectors(lastLabel);
938                                 
939                                 processedLabels.insert(lookup[0]->getLabel());
940                                 userLabels.erase(lookup[0]->getLabel());
941                                 
942                                 //restore real lastlabel to save below
943                                 lookup[0]->setLabel(saveLabel);
944                                 break;
945                         }
946                         
947                         lastLabel = lookup[0]->getLabel();
948                         
949                         //get next line to process
950                         //prevent memory leak
951                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
952                         lookup = input.getSharedRAbundVectors();
953                 }
954                 
955                 
956                 if (m->control_pressed) {  return 0;  }
957                 
958                 //output error messages about any remaining user labels
959                 set<string>::iterator it;
960                 bool needToRun = false;
961                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
962                         m->mothurOut("Your file does not include the label " + *it);
963                         if (processedLabels.count(lastLabel) != 1) {
964                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
965                                 needToRun = true;
966                         }else {
967                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
968                         }
969                 }
970                 
971                 //run last label if you need to
972                 if (needToRun == true)  {
973                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } }
974                         lookup = input.getSharedRAbundVectors(lastLabel);
975                 }
976                 
977                 return 0;
978         }
979         catch(exception& e) {
980                 m->errorOut(e, "RemoveLineageCommand", "getShared");
981                 exit(1);
982         }
983 }
984
985
986 //**********************************************************************************************************************
987 int RemoveLineageCommand::readGroup(){
988         try {
989                 string thisOutputDir = outputDir;
990                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
991                 map<string, string> variables; 
992                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
993         variables["[extension]"] = m->getExtension(groupfile);
994                 string outputFileName = getOutputFileName("group", variables);
995         
996                 ofstream out;
997                 m->openOutputFile(outputFileName, out);
998
999                 ifstream in;
1000                 m->openInputFile(groupfile, in);
1001                 string name, group;
1002                 
1003                 bool wroteSomething = false;
1004                 
1005                 while(!in.eof()){
1006                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1007                         
1008                         in >> name;                             //read from first column
1009                         in >> group;                    //read from second column
1010                         
1011                         //if this name is in the accnos file
1012                         if (names.count(name) == 0) {
1013                                 wroteSomething = true;
1014                                 out << name << '\t' << group << endl;
1015                         }
1016                                         
1017                         m->gobble(in);
1018                 }
1019                 in.close();
1020                 out.close();
1021                 
1022                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
1023                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
1024                 
1025                 return 0;
1026         }
1027         catch(exception& e) {
1028                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
1029                 exit(1);
1030         }
1031 }
1032 //**********************************************************************************************************************
1033 int RemoveLineageCommand::readTax(){
1034         try {
1035                 string thisOutputDir = outputDir;
1036                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1037                 map<string, string> variables; 
1038                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1039         variables["[extension]"] = m->getExtension(taxfile);
1040                 string outputFileName = getOutputFileName("taxonomy", variables);
1041                 ofstream out;
1042                 m->openOutputFile(outputFileName, out);
1043                 
1044                 ifstream in;
1045                 m->openInputFile(taxfile, in);
1046                 string name, tax;
1047                 
1048                 bool wroteSomething = false;
1049                 
1050                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1051                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1052                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1053                 
1054                 for (int i = 0; i < listOfTaxons.size(); i++) {
1055                         noConfidenceTaxons[i] = listOfTaxons[i];
1056                         int hasConPos = listOfTaxons[i].find_first_of('(');
1057                         if (hasConPos != string::npos) {  
1058                                 taxonsHasConfidence[i] = true; 
1059                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
1060                                 noConfidenceTaxons[i] = listOfTaxons[i];
1061                                 m->removeConfidences(noConfidenceTaxons[i]);
1062                         }
1063                 }
1064                 
1065                 
1066                 while(!in.eof()){
1067
1068                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1069
1070                         in >> name;                             //read from first column
1071                         in >> tax;                      //read from second column
1072                         
1073                         bool remove = false;
1074                         
1075             string noQuotesTax = m->removeQuotes(tax);
1076             
1077                         for (int j = 0; j < listOfTaxons.size(); j++) {
1078                                 string newtax = noQuotesTax;
1079                                 
1080                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1081                                 if (!taxonsHasConfidence[j]) {
1082                                         
1083                                         int hasConfidences = noQuotesTax.find_first_of('(');
1084                                         if (hasConfidences != string::npos) { 
1085                                                 newtax = noQuotesTax;
1086                                                 m->removeConfidences(newtax);
1087                                         }
1088                                         
1089                                         int pos = newtax.find(noConfidenceTaxons[j]);
1090                                         
1091                                         if (pos == string::npos) { 
1092                                                 //wroteSomething = true;
1093                                                 //out << name << '\t' << tax << endl;
1094                                         }else{ //this sequence contains the taxon the user wants to remove
1095                                                 names.insert(name);
1096                                                 remove=true; break;
1097                                         }
1098                                         
1099                                 }else{//if taxons has them and you don't them remove taxons
1100                                         int hasConfidences = noQuotesTax.find_first_of('(');
1101                                         if (hasConfidences == string::npos) { 
1102                                                 
1103                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1104                                                 
1105                                                 if (pos == string::npos) { 
1106                                                         //wroteSomething = true;
1107                                                         //out << name << '\t' << tax << endl;
1108                                                 }else{ //this sequence contains the taxon the user wants to remove
1109                                                         names.insert(name);
1110                                                         remove=true; break;
1111                                                 }
1112                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1113                                                 //first remove confidences from both and see if the taxonomy exists
1114                                                 
1115                                                 string noNewTax = noQuotesTax;
1116                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1117                                                 if (hasConfidences != string::npos) { 
1118                                                         noNewTax = noQuotesTax;
1119                                                         m->removeConfidences(noNewTax);
1120                                                 }
1121                                                 
1122                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1123                                                 
1124                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1125                                                         
1126                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1127                                                         
1128                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1129                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1130                                                         int index = 0;
1131                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1132                                                                 
1133                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
1134                                                                         index = i;  
1135                                                                         int spot = 0;
1136                                                                         bool goodspot = true;
1137                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1138                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1139                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1140                                                                                 else { spot++; }
1141                                                                         }
1142                                                                         
1143                                                                         if (goodspot) { break; }
1144                                                                 }
1145                                                         }
1146                                                         
1147                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1148                                                                 
1149                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1150                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1151                                                                                 remove = true;
1152                                                                                 break;
1153                                                                         }
1154                                                                 }else {
1155                                                                         remove = true;
1156                                                                         break;
1157                                                                 }
1158                                                         }
1159                                                         
1160                                                         //passed the test so remove you
1161                                                         if (remove) {
1162                                                                 names.insert(name);
1163                                                                 remove=true; break;
1164                                                         }else {
1165                                                                 //wroteSomething = true;
1166                                                                 //out << name << '\t' << tax << endl;
1167                                                         }
1168                                                 }else {
1169                                                         //wroteSomething = true;
1170                                                         //out << name << '\t' << tax << endl;
1171                                                 }
1172                                         }
1173                                 }
1174                                 
1175                         }
1176                         
1177                         if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
1178                         m->gobble(in);
1179                 }
1180                 in.close();
1181                 out.close();
1182                 
1183                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
1184                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
1185                         
1186                 return 0;
1187
1188         }
1189         catch(exception& e) {
1190                 m->errorOut(e, "RemoveLineageCommand", "readTax");
1191                 exit(1);
1192         }
1193 }
1194 //**********************************************************************************************************************
1195 int RemoveLineageCommand::readConsTax(){
1196         try {
1197                 string thisOutputDir = outputDir;
1198                 if (outputDir == "") {  thisOutputDir += m->hasPath(constaxonomy);  }
1199                 map<string, string> variables;
1200                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomy));
1201         variables["[extension]"] = m->getExtension(constaxonomy);
1202                 string outputFileName = getOutputFileName("constaxonomy", variables);
1203                 ofstream out;
1204                 m->openOutputFile(outputFileName, out);
1205                 
1206                 ifstream in;
1207                 m->openInputFile(constaxonomy, in);
1208                 string otuLabel, tax;
1209         int numReps;
1210         bool wroteSomething = false;
1211         
1212         //read headers
1213         string headers = m->getline(in);
1214         out << headers << endl;
1215                 
1216                 //bool wroteSomething = false;
1217                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1218                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1219                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1220                 
1221                 for (int i = 0; i < listOfTaxons.size(); i++) {
1222                         noConfidenceTaxons[i] = listOfTaxons[i];
1223                         int hasConPos = listOfTaxons[i].find_first_of('(');
1224                         if (hasConPos != string::npos) {
1225                                 taxonsHasConfidence[i] = true;
1226                                 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1227                                 noConfidenceTaxons[i] = listOfTaxons[i];
1228                                 m->removeConfidences(noConfidenceTaxons[i]);
1229                         }
1230                 }
1231                 
1232         
1233                 while(!in.eof()){
1234             
1235                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1236             
1237                         in >> otuLabel;                 m->gobble(in);
1238             in >> numReps;          m->gobble(in);
1239                         in >> tax;              m->gobble(in);
1240                         
1241             bool remove = false;
1242                         
1243             string noQuotesTax = m->removeQuotes(tax);
1244             
1245                         for (int j = 0; j < listOfTaxons.size(); j++) {
1246                                 string newtax = noQuotesTax;
1247                                 
1248                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1249                                 if (!taxonsHasConfidence[j]) {
1250                                         
1251                                         int hasConfidences = noQuotesTax.find_first_of('(');
1252                                         if (hasConfidences != string::npos) {
1253                                                 newtax = noQuotesTax;
1254                                                 m->removeConfidences(newtax);
1255                                         }
1256                                         
1257                                         int pos = newtax.find(noConfidenceTaxons[j]);
1258                                         
1259                                         if (pos == string::npos) {
1260                                                 //wroteSomething = true;
1261                                                 //out << name << '\t' << tax << endl;
1262                                         }else{ //this sequence contains the taxon the user wants to remove
1263                                                 names.insert(m->getSimpleLabel(otuLabel));
1264                                                 remove=true; break;
1265                                         }
1266                                         
1267                                 }else{//if taxons has them and you don't them remove taxons
1268                                         int hasConfidences = noQuotesTax.find_first_of('(');
1269                                         if (hasConfidences == string::npos) {
1270                                                 
1271                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1272                                                 
1273                                                 if (pos == string::npos) {
1274                                                         //wroteSomething = true;
1275                                                         //out << name << '\t' << tax << endl;
1276                                                 }else{ //this sequence contains the taxon the user wants to remove
1277                                                         names.insert(m->getSimpleLabel(otuLabel));
1278                                                         remove=true; break;
1279                                                 }
1280                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1281                                                 //first remove confidences from both and see if the taxonomy exists
1282                                                 
1283                                                 string noNewTax = noQuotesTax;
1284                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1285                                                 if (hasConfidences != string::npos) {
1286                                                         noNewTax = noQuotesTax;
1287                                                         m->removeConfidences(noNewTax);
1288                                                 }
1289                                                 
1290                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1291                                                 
1292                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1293                                                         
1294                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1295                                                         
1296                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1297                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1298                                                         int index = 0;
1299                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1300                                                                 
1301                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1302                                                                         index = i;
1303                                                                         int spot = 0;
1304                                                                         bool goodspot = true;
1305                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1306                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1307                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1308                                                                                 else { spot++; }
1309                                                                         }
1310                                                                         
1311                                                                         if (goodspot) { break; }
1312                                                                 }
1313                                                         }
1314                                                         
1315                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1316                                                                 
1317                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1318                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1319                                                                                 remove = true;
1320                                                                                 break;
1321                                                                         }
1322                                                                 }else {
1323                                                                         remove = true;
1324                                                                         break;
1325                                                                 }
1326                                                         }
1327                                                         
1328                                                         //passed the test so remove you
1329                                                         if (remove) {
1330                                                                 names.insert(m->getSimpleLabel(otuLabel));
1331                                                                 remove=true; break;
1332                                                         }else {
1333                                                                 //wroteSomething = true;
1334                                                                 //out << name << '\t' << tax << endl;
1335                                                         }
1336                                                 }else {
1337                                                         //wroteSomething = true;
1338                                                         //out << name << '\t' << tax << endl;
1339                                                 }
1340                                         }
1341                                 }
1342                                 
1343                         }
1344                         
1345                         if (!remove) {  wroteSomething = true; out << otuLabel << '\t' << numReps << '\t' << tax << endl; }
1346                         
1347                 }
1348                 in.close();
1349                 out.close();
1350                 
1351                 if (names.size() == 0) { m->mothurOut("Your constaxonomy file contains OTUs only from " + taxons + "."); m->mothurOutEndLine();  }
1352                 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
1353         
1354                 return 0;
1355         
1356         }
1357         catch(exception& e) {
1358                 m->errorOut(e, "RemoveLineageCommand", "readConsTax");
1359                 exit(1);
1360         }
1361 }
1362 /**************************************************************************************************/
1363 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
1364         try {
1365                 
1366                 vector< map<string, float> > t;
1367                 string taxon = "";
1368                 int taxLength = tax.length();
1369                 for(int i=0;i<taxLength;i++){
1370                         if(tax[i] == ';'){
1371                 
1372                                 int openParen = taxon.find_last_of('(');
1373                                 int closeParen = taxon.find_last_of(')');
1374                                 
1375                                 string newtaxon, confidence;
1376                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
1377                     string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
1378                     if (m->isNumeric1(confidenceScore)) {  //its a confidence
1379                         newtaxon = taxon.substr(0, openParen); //rip off confidence
1380                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
1381                     }else { //its part of the taxon
1382                         newtaxon = taxon;
1383                         confidence = "0";
1384                     }
1385                                 }else{
1386                                         newtaxon = taxon;
1387                                         confidence = "0";
1388                                 } 
1389                                 float con = 0;
1390                                 convert(confidence, con);
1391                                 
1392                                 map<string, float> temp;
1393                                 temp[newtaxon] = con;
1394                                 t.push_back(temp);
1395                                 taxon = "";
1396                         }
1397                         else{
1398                                 taxon += tax[i];
1399                         }
1400                 }
1401                 
1402                 return t;
1403         }
1404         catch(exception& e) {
1405                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
1406                 exit(1);
1407         }
1408 }
1409 //**********************************************************************************************************************
1410 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
1411 int RemoveLineageCommand::readAlign(){
1412         try {
1413                 string thisOutputDir = outputDir;
1414                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
1415         map<string, string> variables; 
1416                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1417         variables["[extension]"] = m->getExtension(alignfile);
1418                 string outputFileName = getOutputFileName("alignreport", variables);
1419                 
1420                 ofstream out;
1421                 m->openOutputFile(outputFileName, out);
1422
1423                 ifstream in;
1424                 m->openInputFile(alignfile, in);
1425                 string name, junk;
1426                 
1427                 bool wroteSomething = false;
1428                 
1429                 //read column headers
1430                 for (int i = 0; i < 16; i++) {  
1431                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1432                         else                    {       break;                  }
1433                 }
1434                 out << endl;
1435                 
1436                 while(!in.eof()){
1437                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1438                         
1439                         in >> name;                             //read from first column
1440                         
1441                         //if this name is in the accnos file
1442                         if (names.count(name) == 0) {
1443                                 wroteSomething = true;
1444                                 
1445                                 out << name << '\t';
1446                                 
1447                                 //read rest
1448                                 for (int i = 0; i < 15; i++) {  
1449                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1450                                         else                    {       break;                  }
1451                                 }
1452                                 out << endl;
1453                                 
1454                         }else {//still read just don't do anything with it
1455                                 
1456                                 //read rest
1457                                 for (int i = 0; i < 15; i++) {  
1458                                         if (!in.eof())  {       in >> junk;             }
1459                                         else                    {       break;                  }
1460                                 }
1461                         }
1462                         
1463                         m->gobble(in);
1464                 }
1465                 in.close();
1466                 out.close();
1467                 
1468                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
1469                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
1470                 
1471                 return 0;
1472                 
1473         }
1474         catch(exception& e) {
1475                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
1476                 exit(1);
1477         }
1478 }
1479 //**********************************************************************************************************************
1480