]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
added modify names parameter to set.dir
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> RemoveLineageCommand::setParameters(){   
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
20         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24         CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT", "none","shared",false,false, true); parameters.push_back(pshared);
25                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
26         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "tax", "FNGLT", "none","constaxonomy",false,false, true); parameters.push_back(pconstaxonomy);
27                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
28         CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
29                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "","",false,true,true); parameters.push_back(ptaxon);
30                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "RemoveLineageCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string RemoveLineageCommand::getHelpString(){   
45         try {
46                 string helpString = "";
47                 helpString += "The remove.lineage command reads a taxonomy or constaxonomy file and any of the following file types: fasta, name, group, count, list, shared or alignreport file. The constaxonomy can only be used with a shared or list file.\n";
48                 helpString += "It outputs a file containing only the sequences or OTUS from the taxonomy file that are not from the taxon you requested to be removed.\n";
49                 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, count, list, shared, taxonomy, alignreport, label and dups.  You must provide taxonomy or constaxonomy unless you have a valid current taxonomy file.\n";
50                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
51                 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
52                 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
53                 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
54         helpString += "The label parameter is used to analyze specific labels in your input. \n";
55                 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
56                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
57                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
58                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
59                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
60                 return helpString;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 string RemoveLineageCommand::getOutputPattern(string type) {
69     try {
70         string pattern = "";
71         
72         if (type == "fasta")                {   pattern = "[filename],pick,[extension]";    }
73         else if (type == "taxonomy")        {   pattern = "[filename],pick,[extension]";    }
74         else if (type == "constaxonomy")    {   pattern = "[filename],pick,[extension]";    }
75         else if (type == "name")            {   pattern = "[filename],pick,[extension]";    }
76         else if (type == "group")           {   pattern = "[filename],pick,[extension]";    }
77         else if (type == "count")           {   pattern = "[filename],pick,[extension]";    }
78         else if (type == "list")            {   pattern = "[filename],pick,[extension]-[filename],[distance],pick,[extension]";    }
79         else if (type == "shared")          {   pattern = "[filename],[distance],pick,[extension]";    }
80         else if (type == "alignreport")     {   pattern = "[filename],pick.align.report";    }
81         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
82         
83         return pattern;
84     }
85     catch(exception& e) {
86         m->errorOut(e, "RemoveLineageCommand", "getOutputPattern");
87         exit(1);
88     }
89 }
90
91 //**********************************************************************************************************************
92 RemoveLineageCommand::RemoveLineageCommand(){   
93         try {
94                 abort = true; calledHelp = true; 
95                 setParameters();
96                 vector<string> tempOutNames;
97                 outputTypes["fasta"] = tempOutNames;
98                 outputTypes["taxonomy"] = tempOutNames;
99                 outputTypes["name"] = tempOutNames;
100                 outputTypes["group"] = tempOutNames;
101                 outputTypes["alignreport"] = tempOutNames;
102                 outputTypes["list"] = tempOutNames;
103         outputTypes["count"] = tempOutNames;
104         outputTypes["constaxonomy"] = tempOutNames;
105         outputTypes["shared"] = tempOutNames;
106
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
110                 exit(1);
111         }
112 }
113 //**********************************************************************************************************************
114 RemoveLineageCommand::RemoveLineageCommand(string option)  {
115         try {
116                 abort = false; calledHelp = false;   
117                                 
118                 //allow user to run help
119                 if(option == "help") { help(); abort = true; calledHelp = true; }
120                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
121                 
122                 else {
123                         vector<string> myArray = setParameters();       
124                         
125                         OptionParser parser(option);
126                         map<string,string> parameters = parser.getParameters();
127                         
128                         ValidParameters validParameter;
129                         map<string,string>::iterator it;
130                         
131                         //check to make sure all parameters are valid for command
132                         for (it = parameters.begin(); it != parameters.end(); it++) { 
133                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
134                         }
135                         
136                         //initialize outputTypes
137                         vector<string> tempOutNames;
138                         outputTypes["fasta"] = tempOutNames;
139                         outputTypes["taxonomy"] = tempOutNames;
140                         outputTypes["name"] = tempOutNames;
141                         outputTypes["group"] = tempOutNames;
142                         outputTypes["alignreport"] = tempOutNames;
143                         outputTypes["list"] = tempOutNames;
144             outputTypes["count"] = tempOutNames;
145             outputTypes["constaxonomy"] = tempOutNames;
146             outputTypes["shared"] = tempOutNames;
147
148                         
149                         //if the user changes the output directory command factory will send this info to us in the output parameter 
150                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("alignreport");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
163                                 }
164                                 
165                                 it = parameters.find("fasta");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
171                                 }
172                                 
173                                 it = parameters.find("list");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                                 it = parameters.find("group");
190                                 //user has given a template file
191                                 if(it != parameters.end()){ 
192                                         path = m->hasPath(it->second);
193                                         //if the user has not given a path then, add inputdir. else leave path alone.
194                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
195                                 }
196                                 
197                                 it = parameters.find("taxonomy");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
203                                 }
204                 
205                 it = parameters.find("count");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
211                                 }
212                 
213                 it = parameters.find("constaxonomy");
214                                 //user has given a template file
215                                 if(it != parameters.end()){
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
219                                 }
220                 
221                 it = parameters.find("shared");
222                                 //user has given a template file
223                                 if(it != parameters.end()){
224                                         path = m->hasPath(it->second);
225                                         //if the user has not given a path then, add inputdir. else leave path alone.
226                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
227                                 }
228                         }
229
230                         
231                         //check for required parameters                 
232                         fastafile = validParameter.validFile(parameters, "fasta", true);
233                         if (fastafile == "not open") { fastafile = ""; abort = true; }
234                         else if (fastafile == "not found") {  fastafile = "";  }        
235                         else { m->setFastaFile(fastafile); }
236                         
237                         namefile = validParameter.validFile(parameters, "name", true);
238                         if (namefile == "not open") { namefile = ""; abort = true; }
239                         else if (namefile == "not found") {  namefile = "";  }  
240                         else { m->setNameFile(namefile); }
241                         
242                         groupfile = validParameter.validFile(parameters, "group", true);
243                         if (groupfile == "not open") { abort = true; }
244                         else if (groupfile == "not found") {  groupfile = "";  }        
245                         else { m->setGroupFile(groupfile); }
246                         
247                         alignfile = validParameter.validFile(parameters, "alignreport", true);
248                         if (alignfile == "not open") { abort = true; }
249                         else if (alignfile == "not found") {  alignfile = "";  }
250                         
251                         listfile = validParameter.validFile(parameters, "list", true);
252                         if (listfile == "not open") { abort = true; }
253                         else if (listfile == "not found") {  listfile = "";  }
254                         else { m->setListFile(listfile); }
255                         
256                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
257                         if (taxfile == "not open") { taxfile = ""; abort = true; }
258                         else if (taxfile == "not found") {  taxfile = "";               }
259                         else { m->setTaxonomyFile(taxfile); }
260             
261             sharedfile = validParameter.validFile(parameters, "shared", true);
262                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
263                         else if (sharedfile == "not found") {  sharedfile = "";         }
264                         else { m->setSharedFile(sharedfile); }
265             
266             
267             constaxonomy = validParameter.validFile(parameters, "constaxonomy", true);
268                         if (constaxonomy == "not open") { constaxonomy = ""; abort = true; }
269                         else if (constaxonomy == "not found") {  constaxonomy = "";             }
270             
271             if ((constaxonomy == "") && (taxfile == "")) {
272                 taxfile = m->getTaxonomyFile();
273                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
274                 else {
275                     m->mothurOut("You have no current taxonomy file and did not provide a constaxonomy file. The taxonomy or constaxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
276                         }
277
278                         
279             countfile = validParameter.validFile(parameters, "count", true);
280             if (countfile == "not open") { countfile = ""; abort = true; }
281             else if (countfile == "not found") { countfile = "";  }     
282             else { m->setCountTableFile(countfile); }
283             
284             if ((namefile != "") && (countfile != "")) {
285                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
286             }
287             
288             if ((groupfile != "") && (countfile != "")) {
289                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
290             }
291             
292                         string usedDups = "true";
293                         string temp = validParameter.validFile(parameters, "dups", false);      
294                         if (temp == "not found") { 
295                                 if (namefile != "") {  temp = "true";                                   }
296                                 else                            {  temp = "false"; usedDups = "";       }
297                         }
298                         dups = m->isTrue(temp);
299                         
300                         taxons = validParameter.validFile(parameters, "taxon", false);  
301                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
302                         else { 
303                                 //rip off quotes
304                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
305                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
306                         }
307                         m->splitAtChar(taxons, listOfTaxons, '-');
308                         
309                         if ((fastafile == "") && (constaxonomy == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy, constaxonomy, shared or listfile."); m->mothurOutEndLine(); abort = true; }
310             
311             if ((constaxonomy != "") && ((fastafile != "") || (namefile != "") || (groupfile != "") || (alignfile != "") || (taxfile != "") || (countfile != ""))) {
312                 m->mothurOut("[ERROR]: can only use constaxonomy file with a list or shared file, aborting.\n");  abort = true;
313             }
314             
315             if ((constaxonomy != "") && (taxfile != "")) {
316                 m->mothurOut("[ERROR]: Choose only one: taxonomy or constaxonomy, aborting.\n"); abort = true;
317             }
318             
319             if ((sharedfile != "") && (taxfile != "")) {
320                 m->mothurOut("[ERROR]: sharedfile can only be used with constaxonomy file, aborting.\n"); abort = true;
321             }
322             
323             if ((sharedfile != "") || (listfile != "")) {
324                 label = validParameter.validFile(parameters, "label", false);
325                 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
326             }
327
328                 
329                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
330                         
331                         if (countfile == "") {
332                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
333                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
334                     parser.getNameFile(files);
335                 }
336             }
337                         
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
343                 exit(1);
344         }
345 }
346 //**********************************************************************************************************************
347
348 int RemoveLineageCommand::execute(){
349         try {
350                 
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 if (m->control_pressed) { return 0; }
354         
355         if (countfile != "") {
356             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
357                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
358             }
359         }
360                 
361                 //read through the correct file and output lines you want to keep
362                 if (taxfile != "")                      {
363             readTax(); //fills the set of names to get
364             if (namefile != "")                 {               readName();             }
365             if (fastafile != "")                {               readFasta();    }
366             if (countfile != "")                {               readCount();    }
367             if (groupfile != "")                {               readGroup();    }
368             if (alignfile != "")                {               readAlign();    }
369             if (listfile != "")                 {               readList();             }
370             
371         }else {
372             readConsTax();
373             if (listfile != "")                 {               readConsList();         }
374             if (sharedfile != "")               {               readShared();           }
375         }
376                 
377                 
378                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
379                 
380                 if (outputNames.size() != 0) {
381                         m->mothurOutEndLine();
382                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
383                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
384                         m->mothurOutEndLine();
385                         
386                         //set fasta file as new current fastafile
387                         string current = "";
388                         itTypes = outputTypes.find("fasta");
389                         if (itTypes != outputTypes.end()) {
390                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
391                         }
392                         
393                         itTypes = outputTypes.find("name");
394                         if (itTypes != outputTypes.end()) {
395                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
396                         }
397                         
398                         itTypes = outputTypes.find("group");
399                         if (itTypes != outputTypes.end()) {
400                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
401                         }
402                         
403                         itTypes = outputTypes.find("list");
404                         if (itTypes != outputTypes.end()) {
405                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
406                         }
407                         
408                         itTypes = outputTypes.find("taxonomy");
409                         if (itTypes != outputTypes.end()) {
410                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
411                         }
412             
413             itTypes = outputTypes.find("count");
414                         if (itTypes != outputTypes.end()) {
415                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
416                         }
417                 }
418                 
419                 return 0;               
420         }
421
422         catch(exception& e) {
423                 m->errorOut(e, "RemoveLineageCommand", "execute");
424                 exit(1);
425         }
426 }
427
428 //**********************************************************************************************************************
429 int RemoveLineageCommand::readFasta(){
430         try {
431                 string thisOutputDir = outputDir;
432                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
433                 map<string, string> variables; 
434         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
435         variables["[extension]"] = m->getExtension(fastafile);
436                 string outputFileName = getOutputFileName("fasta", variables);  
437                 ofstream out;
438                 m->openOutputFile(outputFileName, out);
439                 
440                 ifstream in;
441                 m->openInputFile(fastafile, in);
442                 string name;
443                 
444                 bool wroteSomething = false;
445                 
446                 while(!in.eof()){
447                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
448                         
449                         Sequence currSeq(in);
450                         name = currSeq.getName();
451                         
452                         if (name != "") {
453                                 //if this name is in the accnos file
454                                 if (names.count(name) == 0) {
455                                         wroteSomething = true;
456                                         
457                                         currSeq.printSequence(out);
458                                 }
459                         }
460                         m->gobble(in);
461                 }
462                 in.close();     
463                 out.close();
464                 
465                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
466                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
467                 
468                 return 0;
469                 
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
473                 exit(1);
474         }
475 }
476 //**********************************************************************************************************************
477 int RemoveLineageCommand::readList(){
478         try {
479                 string thisOutputDir = outputDir;
480                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
481                 map<string, string> variables; 
482         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
483         variables["[extension]"] = m->getExtension(listfile);
484                 string outputFileName = getOutputFileName("list", variables);   
485                 ofstream out;
486                 m->openOutputFile(outputFileName, out);
487                 
488                 ifstream in;
489                 m->openInputFile(listfile, in);
490                 
491                 bool wroteSomething = false;
492                 
493                 while(!in.eof()){
494                         //read in list vector
495                         ListVector list(in);
496                         
497                         //make a new list vector
498                         ListVector newList;
499                         newList.setLabel(list.getLabel());
500                         
501                         //for each bin
502                         for (int i = 0; i < list.getNumBins(); i++) {
503                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
504                         
505                                 //parse out names that are in accnos file
506                                 string binnames = list.get(i);
507                 vector<string> bnames;
508                 m->splitAtComma(binnames, bnames);
509                                 
510                                 string newNames = "";
511                 for (int j = 0; j < bnames.size(); j++) {
512                                         string name = bnames[j];
513                                         //if that name is in the .accnos file, add it
514                                         if (names.count(name) == 0) {  newNames += name + ",";  }
515                                 }
516
517                                 //if there are names in this bin add to new list
518                                 if (newNames != "") {  
519                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
520                                         newList.push_back(newNames);    
521                                 }
522                         }
523                                 
524                         //print new listvector
525                         if (newList.getNumBins() != 0) {
526                                 wroteSomething = true;
527                                 newList.print(out);
528                         }
529                         
530                         m->gobble(in);
531                 }
532                 in.close();     
533                 out.close();
534                 
535                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
536                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
537                                 
538                 return 0;
539
540         }
541         catch(exception& e) {
542                 m->errorOut(e, "RemoveLineageCommand", "readList");
543                 exit(1);
544         }
545 }
546 //**********************************************************************************************************************
547 int RemoveLineageCommand::readName(){
548         try {
549                 string thisOutputDir = outputDir;
550                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
551                 map<string, string> variables; 
552                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
553         variables["[extension]"] = m->getExtension(namefile);
554                 string outputFileName = getOutputFileName("name", variables);
555                 ofstream out;
556                 m->openOutputFile(outputFileName, out);
557
558                 ifstream in;
559                 m->openInputFile(namefile, in);
560                 string name, firstCol, secondCol;
561                 
562                 bool wroteSomething = false;
563                 
564                 while(!in.eof()){
565                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
566
567                         in >> firstCol;                         
568                         in >> secondCol;                        
569
570                         vector<string> parsedNames;
571                         m->splitAtComma(secondCol, parsedNames);
572                         
573                         vector<string> validSecond;  validSecond.clear();
574                         for (int i = 0; i < parsedNames.size(); i++) {
575                                 if (names.count(parsedNames[i]) == 0) {
576                                         validSecond.push_back(parsedNames[i]);
577                                 }
578                         }
579                         
580                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
581                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
582                         }else {
583                                         //if the name in the first column is in the set then print it and any other names in second column also in set
584                                 if (names.count(firstCol) == 0) {
585                                         
586                                         wroteSomething = true;
587                                         
588                                         out << firstCol << '\t';
589                                         
590                                         //you know you have at least one valid second since first column is valid
591                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
592                                         out << validSecond[validSecond.size()-1] << endl;
593                                         
594                                         //make first name in set you come to first column and then add the remaining names to second column
595                                 }else {
596                                         
597                                         //you want part of this row
598                                         if (validSecond.size() != 0) {
599                                                 
600                                                 wroteSomething = true;
601                                                 
602                                                 out << validSecond[0] << '\t';
603                                                 
604                                                 //you know you have at least one valid second since first column is valid
605                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
606                                                 out << validSecond[validSecond.size()-1] << endl;
607                                         }
608                                 }
609                         }
610                         m->gobble(in);
611                 }
612                 in.close();
613                 out.close();
614
615                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
616                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
617                                 
618                 return 0;
619         }
620         catch(exception& e) {
621                 m->errorOut(e, "RemoveLineageCommand", "readName");
622                 exit(1);
623         }
624 }
625 //**********************************************************************************************************************
626 int RemoveLineageCommand::readCount(){
627         try {
628                 string thisOutputDir = outputDir;
629                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
630                 map<string, string> variables; 
631                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
632         variables["[extension]"] = m->getExtension(countfile);
633                 string outputFileName = getOutputFileName("count", variables);
634
635                 
636                 ofstream out;
637                 m->openOutputFile(outputFileName, out);
638                 
639                 ifstream in;
640                 m->openInputFile(countfile, in);
641                 
642                 bool wroteSomething = false;
643                 
644         string headers = m->getline(in); m->gobble(in);
645         out << headers << endl;
646         
647         string name, rest; int thisTotal;
648         while (!in.eof()) {
649             
650             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
651             
652             in >> name; m->gobble(in); 
653             in >> thisTotal; m->gobble(in);
654             rest = m->getline(in); m->gobble(in);
655             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
656             
657             if (names.count(name) == 0) {
658                 out << name << '\t' << thisTotal << '\t' << rest << endl;
659                 wroteSomething = true;
660             }
661         }
662         in.close();
663                 out.close();
664         
665         //check for groups that have been eliminated
666         CountTable ct;
667         if (ct.testGroups(outputFileName)) {
668             ct.readTable(outputFileName, true);
669             ct.printTable(outputFileName);
670         }
671                 
672                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
673                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
674         
675                 return 0;
676         }
677         catch(exception& e) {
678                 m->errorOut(e, "RemoveLineageCommand", "readCount");
679                 exit(1);
680         }
681 }
682 //**********************************************************************************************************************
683 int RemoveLineageCommand::readConsList(){
684         try {
685                 getListVector();
686         
687         if (m->control_pressed) { delete list; return 0;}
688         
689         ListVector newList;
690         newList.setLabel(list->getLabel());
691         int removedCount = 0;
692         bool wroteSomething = false;
693         string snumBins = toString(list->getNumBins());
694         
695         for (int i = 0; i < list->getNumBins(); i++) {
696             
697             if (m->control_pressed) { delete list; return 0;}
698             
699             //create a label for this otu
700             string otuLabel = "Otu";
701             string sbinNumber = toString(i+1);
702             if (sbinNumber.length() < snumBins.length()) {
703                 int diff = snumBins.length() - sbinNumber.length();
704                 for (int h = 0; h < diff; h++) { otuLabel += "0"; }
705             }
706             otuLabel += sbinNumber;
707             
708             if (names.count(otuLabel) == 0) {
709                 newList.push_back(list->get(i));
710             }else { removedCount++; }
711         }
712         
713         string thisOutputDir = outputDir;
714                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
715         map<string, string> variables;
716                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
717         variables["[extension]"] = m->getExtension(listfile);
718         variables["[distance]"] = list->getLabel();
719                 string outputFileName = getOutputFileName("list", variables);
720                 ofstream out;
721                 m->openOutputFile(outputFileName, out);
722         
723                 delete list;
724         //print new listvector
725         if (newList.getNumBins() != 0) {
726             wroteSomething = true;
727             newList.print(out);
728         }
729                 out.close();
730                 
731                 if (wroteSomething == false) { m->mothurOut("Your file only contains OTUs from " + taxons + "."); m->mothurOutEndLine();  }
732                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
733                 
734                 m->mothurOut("Removed " + toString(removedCount) + " OTUs from your list file."); m->mothurOutEndLine();
735         
736                 return 0;
737         
738         }
739         catch(exception& e) {
740                 m->errorOut(e, "RemoveLineageCommand", "readConsList");
741                 exit(1);
742         }
743 }
744 //**********************************************************************************************************************
745 int RemoveLineageCommand::getListVector(){
746         try {
747                 InputData input(listfile, "list");
748                 list = input.getListVector();
749                 string lastLabel = list->getLabel();
750                 
751                 if (label == "") { label = lastLabel;  return 0; }
752                 
753                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
754                 set<string> labels; labels.insert(label);
755                 set<string> processedLabels;
756                 set<string> userLabels = labels;
757                 
758                 //as long as you are not at the end of the file or done wih the lines you want
759                 while((list != NULL) && (userLabels.size() != 0)) {
760                         if (m->control_pressed) {  return 0;  }
761                         
762                         if(labels.count(list->getLabel()) == 1){
763                                 processedLabels.insert(list->getLabel());
764                                 userLabels.erase(list->getLabel());
765                                 break;
766                         }
767                         
768                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
769                                 string saveLabel = list->getLabel();
770                                 
771                                 delete list;
772                                 list = input.getListVector(lastLabel);
773                                 
774                                 processedLabels.insert(list->getLabel());
775                                 userLabels.erase(list->getLabel());
776                                 
777                                 //restore real lastlabel to save below
778                                 list->setLabel(saveLabel);
779                                 break;
780                         }
781                         
782                         lastLabel = list->getLabel();
783                         
784                         //get next line to process
785                         //prevent memory leak
786                         delete list;
787                         list = input.getListVector();
788                 }
789                 
790                 
791                 if (m->control_pressed) {  return 0;  }
792                 
793                 //output error messages about any remaining user labels
794                 set<string>::iterator it;
795                 bool needToRun = false;
796                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
797                         m->mothurOut("Your file does not include the label " + *it);
798                         if (processedLabels.count(lastLabel) != 1) {
799                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
800                                 needToRun = true;
801                         }else {
802                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
803                         }
804                 }
805                 
806                 //run last label if you need to
807                 if (needToRun == true)  {
808                         delete list;
809                         list = input.getListVector(lastLabel);
810                 }
811                 
812                 return 0;
813         }
814         catch(exception& e) {
815                 m->errorOut(e, "RemoveLineageCommand", "getListVector");
816                 exit(1);
817         }
818 }
819
820 //**********************************************************************************************************************
821 int RemoveLineageCommand::readShared(){
822         try {
823         
824         getShared();
825         
826         if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } return 0; }
827         
828         vector<string> newLabels;
829         
830         //create new "filtered" lookup
831         vector<SharedRAbundVector*> newLookup;
832         for (int i = 0; i < lookup.size(); i++) {
833             SharedRAbundVector* temp = new SharedRAbundVector();
834                         temp->setLabel(lookup[i]->getLabel());
835                         temp->setGroup(lookup[i]->getGroup());
836                         newLookup.push_back(temp);
837         }
838         
839         bool wroteSomething = false;
840         int numRemoved = 0;
841         for (int i = 0; i < lookup[0]->getNumBins(); i++) {
842             
843             if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; } return 0; }
844             
845             //is this otu on the list
846             if (names.count(m->currentBinLabels[i]) == 0) {
847                 wroteSomething = true;
848                 newLabels.push_back(m->currentBinLabels[i]);
849                 for (int j = 0; j < newLookup.size(); j++) { //add this OTU to the new lookup
850                     newLookup[j]->push_back(lookup[j]->getAbundance(i), lookup[j]->getGroup());
851                 }
852             }else { numRemoved++; }
853         }
854         
855         string thisOutputDir = outputDir;
856                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
857         map<string, string> variables;
858                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
859         variables["[extension]"] = m->getExtension(sharedfile);
860         variables["[distance]"] = lookup[0]->getLabel();
861                 string outputFileName = getOutputFileName("shared", variables);
862         ofstream out;
863                 m->openOutputFile(outputFileName, out);
864                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
865         
866                 for (int j = 0; j < lookup.size(); j++) { delete lookup[j]; }
867         
868         m->currentBinLabels = newLabels;
869         
870                 newLookup[0]->printHeaders(out);
871                 
872                 for (int i = 0; i < newLookup.size(); i++) {
873                         out << newLookup[i]->getLabel() << '\t' << newLookup[i]->getGroup() << '\t';
874                         newLookup[i]->print(out);
875                 }
876                 out.close();
877         
878         for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; }
879         
880         if (wroteSomething == false) { m->mothurOut("Your file only contains OTUs from " + taxons + "."); m->mothurOutEndLine();  }
881         
882                 m->mothurOut("Removed " + toString(numRemoved) + " OTUs from your shared file."); m->mothurOutEndLine();
883         
884         return 0;
885     }
886         catch(exception& e) {
887                 m->errorOut(e, "RemoveLineageCommand", "readShared");
888                 exit(1);
889         }
890 }
891 //**********************************************************************************************************************
892 int RemoveLineageCommand::getShared(){
893         try {
894                 InputData input(sharedfile, "sharedfile");
895                 lookup = input.getSharedRAbundVectors();
896                 string lastLabel = lookup[0]->getLabel();
897                 
898                 if (label == "") { label = lastLabel;  return 0; }
899                 
900                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
901                 set<string> labels; labels.insert(label);
902                 set<string> processedLabels;
903                 set<string> userLabels = labels;
904                 
905                 //as long as you are not at the end of the file or done wih the lines you want
906                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
907                         if (m->control_pressed) {   return 0;  }
908                         
909                         if(labels.count(lookup[0]->getLabel()) == 1){
910                                 processedLabels.insert(lookup[0]->getLabel());
911                                 userLabels.erase(lookup[0]->getLabel());
912                                 break;
913                         }
914                         
915                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
916                                 string saveLabel = lookup[0]->getLabel();
917                                 
918                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
919                                 lookup = input.getSharedRAbundVectors(lastLabel);
920                                 
921                                 processedLabels.insert(lookup[0]->getLabel());
922                                 userLabels.erase(lookup[0]->getLabel());
923                                 
924                                 //restore real lastlabel to save below
925                                 lookup[0]->setLabel(saveLabel);
926                                 break;
927                         }
928                         
929                         lastLabel = lookup[0]->getLabel();
930                         
931                         //get next line to process
932                         //prevent memory leak
933                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
934                         lookup = input.getSharedRAbundVectors();
935                 }
936                 
937                 
938                 if (m->control_pressed) {  return 0;  }
939                 
940                 //output error messages about any remaining user labels
941                 set<string>::iterator it;
942                 bool needToRun = false;
943                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
944                         m->mothurOut("Your file does not include the label " + *it);
945                         if (processedLabels.count(lastLabel) != 1) {
946                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
947                                 needToRun = true;
948                         }else {
949                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
950                         }
951                 }
952                 
953                 //run last label if you need to
954                 if (needToRun == true)  {
955                         for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {     delete lookup[i];       } }
956                         lookup = input.getSharedRAbundVectors(lastLabel);
957                 }
958                 
959                 return 0;
960         }
961         catch(exception& e) {
962                 m->errorOut(e, "RemoveLineageCommand", "getShared");
963                 exit(1);
964         }
965 }
966
967
968 //**********************************************************************************************************************
969 int RemoveLineageCommand::readGroup(){
970         try {
971                 string thisOutputDir = outputDir;
972                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
973                 map<string, string> variables; 
974                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
975         variables["[extension]"] = m->getExtension(groupfile);
976                 string outputFileName = getOutputFileName("group", variables);
977         
978                 ofstream out;
979                 m->openOutputFile(outputFileName, out);
980
981                 ifstream in;
982                 m->openInputFile(groupfile, in);
983                 string name, group;
984                 
985                 bool wroteSomething = false;
986                 
987                 while(!in.eof()){
988                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
989                         
990                         in >> name;                             //read from first column
991                         in >> group;                    //read from second column
992                         
993                         //if this name is in the accnos file
994                         if (names.count(name) == 0) {
995                                 wroteSomething = true;
996                                 out << name << '\t' << group << endl;
997                         }
998                                         
999                         m->gobble(in);
1000                 }
1001                 in.close();
1002                 out.close();
1003                 
1004                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
1005                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
1006                 
1007                 return 0;
1008         }
1009         catch(exception& e) {
1010                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
1011                 exit(1);
1012         }
1013 }
1014 //**********************************************************************************************************************
1015 int RemoveLineageCommand::readTax(){
1016         try {
1017                 string thisOutputDir = outputDir;
1018                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
1019                 map<string, string> variables; 
1020                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
1021         variables["[extension]"] = m->getExtension(taxfile);
1022                 string outputFileName = getOutputFileName("taxonomy", variables);
1023                 ofstream out;
1024                 m->openOutputFile(outputFileName, out);
1025                 
1026                 ifstream in;
1027                 m->openInputFile(taxfile, in);
1028                 string name, tax;
1029                 
1030                 bool wroteSomething = false;
1031                 
1032                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1033                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1034                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1035                 
1036                 for (int i = 0; i < listOfTaxons.size(); i++) {
1037                         noConfidenceTaxons[i] = listOfTaxons[i];
1038                         int hasConPos = listOfTaxons[i].find_first_of('(');
1039                         if (hasConPos != string::npos) {  
1040                                 taxonsHasConfidence[i] = true; 
1041                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
1042                                 noConfidenceTaxons[i] = listOfTaxons[i];
1043                                 m->removeConfidences(noConfidenceTaxons[i]);
1044                         }
1045                 }
1046                 
1047                 
1048                 while(!in.eof()){
1049
1050                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1051
1052                         in >> name;                             //read from first column
1053                         in >> tax;                      //read from second column
1054                         
1055                         bool remove = false;
1056                         
1057             string noQuotesTax = m->removeQuotes(tax);
1058             
1059                         for (int j = 0; j < listOfTaxons.size(); j++) {
1060                                 string newtax = noQuotesTax;
1061                                 
1062                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1063                                 if (!taxonsHasConfidence[j]) {
1064                                         
1065                                         int hasConfidences = noQuotesTax.find_first_of('(');
1066                                         if (hasConfidences != string::npos) { 
1067                                                 newtax = noQuotesTax;
1068                                                 m->removeConfidences(newtax);
1069                                         }
1070                                         
1071                                         int pos = newtax.find(noConfidenceTaxons[j]);
1072                                         
1073                                         if (pos == string::npos) { 
1074                                                 //wroteSomething = true;
1075                                                 //out << name << '\t' << tax << endl;
1076                                         }else{ //this sequence contains the taxon the user wants to remove
1077                                                 names.insert(name);
1078                                                 remove=true; break;
1079                                         }
1080                                         
1081                                 }else{//if taxons has them and you don't them remove taxons
1082                                         int hasConfidences = noQuotesTax.find_first_of('(');
1083                                         if (hasConfidences == string::npos) { 
1084                                                 
1085                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1086                                                 
1087                                                 if (pos == string::npos) { 
1088                                                         //wroteSomething = true;
1089                                                         //out << name << '\t' << tax << endl;
1090                                                 }else{ //this sequence contains the taxon the user wants to remove
1091                                                         names.insert(name);
1092                                                         remove=true; break;
1093                                                 }
1094                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1095                                                 //first remove confidences from both and see if the taxonomy exists
1096                                                 
1097                                                 string noNewTax = noQuotesTax;
1098                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1099                                                 if (hasConfidences != string::npos) { 
1100                                                         noNewTax = noQuotesTax;
1101                                                         m->removeConfidences(noNewTax);
1102                                                 }
1103                                                 
1104                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1105                                                 
1106                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1107                                                         
1108                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1109                                                         
1110                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1111                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1112                                                         int index = 0;
1113                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1114                                                                 
1115                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
1116                                                                         index = i;  
1117                                                                         int spot = 0;
1118                                                                         bool goodspot = true;
1119                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1120                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1121                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1122                                                                                 else { spot++; }
1123                                                                         }
1124                                                                         
1125                                                                         if (goodspot) { break; }
1126                                                                 }
1127                                                         }
1128                                                         
1129                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1130                                                                 
1131                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1132                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1133                                                                                 remove = true;
1134                                                                                 break;
1135                                                                         }
1136                                                                 }else {
1137                                                                         remove = true;
1138                                                                         break;
1139                                                                 }
1140                                                         }
1141                                                         
1142                                                         //passed the test so remove you
1143                                                         if (remove) {
1144                                                                 names.insert(name);
1145                                                                 remove=true; break;
1146                                                         }else {
1147                                                                 //wroteSomething = true;
1148                                                                 //out << name << '\t' << tax << endl;
1149                                                         }
1150                                                 }else {
1151                                                         //wroteSomething = true;
1152                                                         //out << name << '\t' << tax << endl;
1153                                                 }
1154                                         }
1155                                 }
1156                                 
1157                         }
1158                         
1159                         if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
1160                         m->gobble(in);
1161                 }
1162                 in.close();
1163                 out.close();
1164                 
1165                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
1166                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
1167                         
1168                 return 0;
1169
1170         }
1171         catch(exception& e) {
1172                 m->errorOut(e, "RemoveLineageCommand", "readTax");
1173                 exit(1);
1174         }
1175 }
1176 //**********************************************************************************************************************
1177 int RemoveLineageCommand::readConsTax(){
1178         try {
1179                 string thisOutputDir = outputDir;
1180                 if (outputDir == "") {  thisOutputDir += m->hasPath(constaxonomy);  }
1181                 map<string, string> variables;
1182                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(constaxonomy));
1183         variables["[extension]"] = m->getExtension(constaxonomy);
1184                 string outputFileName = getOutputFileName("constaxonomy", variables);
1185                 ofstream out;
1186                 m->openOutputFile(outputFileName, out);
1187                 
1188                 ifstream in;
1189                 m->openInputFile(constaxonomy, in);
1190                 string otuLabel, tax;
1191         int numReps;
1192         bool wroteSomething = false;
1193         
1194         //read headers
1195         string headers = m->getline(in);
1196         out << headers << endl;
1197                 
1198                 //bool wroteSomething = false;
1199                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
1200                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
1201                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
1202                 
1203                 for (int i = 0; i < listOfTaxons.size(); i++) {
1204                         noConfidenceTaxons[i] = listOfTaxons[i];
1205                         int hasConPos = listOfTaxons[i].find_first_of('(');
1206                         if (hasConPos != string::npos) {
1207                                 taxonsHasConfidence[i] = true;
1208                                 searchTaxons[i] = getTaxons(listOfTaxons[i]);
1209                                 noConfidenceTaxons[i] = listOfTaxons[i];
1210                                 m->removeConfidences(noConfidenceTaxons[i]);
1211                         }
1212                 }
1213                 
1214         
1215                 while(!in.eof()){
1216             
1217                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
1218             
1219                         in >> otuLabel;                 m->gobble(in);
1220             in >> numReps;          m->gobble(in);
1221                         in >> tax;              m->gobble(in);
1222                         
1223             bool remove = false;
1224                         
1225             string noQuotesTax = m->removeQuotes(tax);
1226             
1227                         for (int j = 0; j < listOfTaxons.size(); j++) {
1228                                 string newtax = noQuotesTax;
1229                                 
1230                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
1231                                 if (!taxonsHasConfidence[j]) {
1232                                         
1233                                         int hasConfidences = noQuotesTax.find_first_of('(');
1234                                         if (hasConfidences != string::npos) {
1235                                                 newtax = noQuotesTax;
1236                                                 m->removeConfidences(newtax);
1237                                         }
1238                                         
1239                                         int pos = newtax.find(noConfidenceTaxons[j]);
1240                                         
1241                                         if (pos == string::npos) {
1242                                                 //wroteSomething = true;
1243                                                 //out << name << '\t' << tax << endl;
1244                                         }else{ //this sequence contains the taxon the user wants to remove
1245                                                 names.insert(otuLabel);
1246                                                 remove=true; break;
1247                                         }
1248                                         
1249                                 }else{//if taxons has them and you don't them remove taxons
1250                                         int hasConfidences = noQuotesTax.find_first_of('(');
1251                                         if (hasConfidences == string::npos) {
1252                                                 
1253                                                 int pos = newtax.find(noConfidenceTaxons[j]);
1254                                                 
1255                                                 if (pos == string::npos) {
1256                                                         //wroteSomething = true;
1257                                                         //out << name << '\t' << tax << endl;
1258                                                 }else{ //this sequence contains the taxon the user wants to remove
1259                                                         names.insert(otuLabel);
1260                                                         remove=true; break;
1261                                                 }
1262                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
1263                                                 //first remove confidences from both and see if the taxonomy exists
1264                                                 
1265                                                 string noNewTax = noQuotesTax;
1266                                                 int hasConfidences = noQuotesTax.find_first_of('(');
1267                                                 if (hasConfidences != string::npos) {
1268                                                         noNewTax = noQuotesTax;
1269                                                         m->removeConfidences(noNewTax);
1270                                                 }
1271                                                 
1272                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
1273                                                 
1274                                                 if (pos != string::npos) { //if yes, then are the confidences okay
1275                                                         
1276                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
1277                                                         
1278                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
1279                                                         //we want to "line them up", so we will find the the index where the searchstring starts
1280                                                         int index = 0;
1281                                                         for (int i = 0; i < usersTaxon.size(); i++) {
1282                                                                 
1283                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) {
1284                                                                         index = i;
1285                                                                         int spot = 0;
1286                                                                         bool goodspot = true;
1287                                                                         //is this really the start, or are we dealing with a taxon of the same name?
1288                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
1289                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
1290                                                                                 else { spot++; }
1291                                                                         }
1292                                                                         
1293                                                                         if (goodspot) { break; }
1294                                                                 }
1295                                                         }
1296                                                         
1297                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
1298                                                                 
1299                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
1300                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
1301                                                                                 remove = true;
1302                                                                                 break;
1303                                                                         }
1304                                                                 }else {
1305                                                                         remove = true;
1306                                                                         break;
1307                                                                 }
1308                                                         }
1309                                                         
1310                                                         //passed the test so remove you
1311                                                         if (remove) {
1312                                                                 names.insert(otuLabel);
1313                                                                 remove=true; break;
1314                                                         }else {
1315                                                                 //wroteSomething = true;
1316                                                                 //out << name << '\t' << tax << endl;
1317                                                         }
1318                                                 }else {
1319                                                         //wroteSomething = true;
1320                                                         //out << name << '\t' << tax << endl;
1321                                                 }
1322                                         }
1323                                 }
1324                                 
1325                         }
1326                         
1327                         if (!remove) {  wroteSomething = true; out << otuLabel << '\t' << numReps << '\t' << tax << endl; }
1328                         
1329                 }
1330                 in.close();
1331                 out.close();
1332                 
1333                 if (names.size() == 0) { m->mothurOut("Your constaxonomy file contains OTUs only from " + taxons + "."); m->mothurOutEndLine();  }
1334                 outputNames.push_back(outputFileName); outputTypes["constaxonomy"].push_back(outputFileName);
1335         
1336                 return 0;
1337         
1338         }
1339         catch(exception& e) {
1340                 m->errorOut(e, "RemoveLineageCommand", "readConsTax");
1341                 exit(1);
1342         }
1343 }
1344 /**************************************************************************************************/
1345 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
1346         try {
1347                 
1348                 vector< map<string, float> > t;
1349                 string taxon = "";
1350                 int taxLength = tax.length();
1351                 for(int i=0;i<taxLength;i++){
1352                         if(tax[i] == ';'){
1353                 
1354                                 int openParen = taxon.find_last_of('(');
1355                                 int closeParen = taxon.find_last_of(')');
1356                                 
1357                                 string newtaxon, confidence;
1358                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
1359                     string confidenceScore = taxon.substr(openParen+1, (closeParen-(openParen+1)));
1360                     if (m->isNumeric1(confidenceScore)) {  //its a confidence
1361                         newtaxon = taxon.substr(0, openParen); //rip off confidence
1362                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
1363                     }else { //its part of the taxon
1364                         newtaxon = taxon;
1365                         confidence = "0";
1366                     }
1367                                 }else{
1368                                         newtaxon = taxon;
1369                                         confidence = "0";
1370                                 } 
1371                                 float con = 0;
1372                                 convert(confidence, con);
1373                                 
1374                                 map<string, float> temp;
1375                                 temp[newtaxon] = con;
1376                                 t.push_back(temp);
1377                                 taxon = "";
1378                         }
1379                         else{
1380                                 taxon += tax[i];
1381                         }
1382                 }
1383                 
1384                 return t;
1385         }
1386         catch(exception& e) {
1387                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
1388                 exit(1);
1389         }
1390 }
1391 //**********************************************************************************************************************
1392 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
1393 int RemoveLineageCommand::readAlign(){
1394         try {
1395                 string thisOutputDir = outputDir;
1396                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
1397         map<string, string> variables; 
1398                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
1399         variables["[extension]"] = m->getExtension(alignfile);
1400                 string outputFileName = getOutputFileName("alignreport", variables);
1401                 
1402                 ofstream out;
1403                 m->openOutputFile(outputFileName, out);
1404
1405                 ifstream in;
1406                 m->openInputFile(alignfile, in);
1407                 string name, junk;
1408                 
1409                 bool wroteSomething = false;
1410                 
1411                 //read column headers
1412                 for (int i = 0; i < 16; i++) {  
1413                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1414                         else                    {       break;                  }
1415                 }
1416                 out << endl;
1417                 
1418                 while(!in.eof()){
1419                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
1420                         
1421                         in >> name;                             //read from first column
1422                         
1423                         //if this name is in the accnos file
1424                         if (names.count(name) == 0) {
1425                                 wroteSomething = true;
1426                                 
1427                                 out << name << '\t';
1428                                 
1429                                 //read rest
1430                                 for (int i = 0; i < 15; i++) {  
1431                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
1432                                         else                    {       break;                  }
1433                                 }
1434                                 out << endl;
1435                                 
1436                         }else {//still read just don't do anything with it
1437                                 
1438                                 //read rest
1439                                 for (int i = 0; i < 15; i++) {  
1440                                         if (!in.eof())  {       in >> junk;             }
1441                                         else                    {       break;                  }
1442                                 }
1443                         }
1444                         
1445                         m->gobble(in);
1446                 }
1447                 in.close();
1448                 out.close();
1449                 
1450                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
1451                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
1452                 
1453                 return 0;
1454                 
1455         }
1456         catch(exception& e) {
1457                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
1458                 exit(1);
1459         }
1460 }
1461 //**********************************************************************************************************************
1462