]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
changed random forest's inheritance
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
14
15 //**********************************************************************************************************************
16 vector<string> RemoveLineageCommand::setParameters(){   
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
19         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
20         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none",false,false); parameters.push_back(pcount);
21                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none",false,false); parameters.push_back(pgroup);
22                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
23                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(ptaxonomy);
24                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(palignreport);
25                 CommandParameter ptaxon("taxon", "String", "", "", "", "", "",false,true); parameters.push_back(ptaxon);
26                 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pdups);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "RemoveLineageCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string RemoveLineageCommand::getHelpString(){   
41         try {
42                 string helpString = "";
43                 helpString += "The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, count, list or alignreport file.\n";
44                 helpString += "It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n";
45                 helpString += "The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, count, alignreport and dups.  You must provide taxonomy unless you have a valid current taxonomy file.\n";
46                 helpString += "The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n";
47                 helpString += "The taxon parameter allows you to select the taxons you would like to remove, and is required.\n";
48                 helpString += "You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n";
49                 helpString += "If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n";
50                 helpString += "The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n";
51                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n";
52                 helpString += "Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n";
53                 helpString += "Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n";
54                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
55                 return helpString;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "RemoveLineageCommand", "getHelpString");
59                 exit(1);
60         }
61 }
62 //**********************************************************************************************************************
63 string RemoveLineageCommand::getOutputFileNameTag(string type, string inputName=""){    
64         try {
65         string outputFileName = "";
66                 map<string, vector<string> >::iterator it;
67         
68         //is this a type this command creates
69         it = outputTypes.find(type);
70         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
71         else {
72             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
75             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
76             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
77             else if (type == "count")       {   outputFileName =  "pick.count_table";   }
78             else if (type == "alignreport")      {   outputFileName =  "pick.align.report";   }
79             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
80         }
81         return outputFileName;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "RemoveLineageCommand", "getOutputFileNameTag");
85                 exit(1);
86         }
87 }
88 //**********************************************************************************************************************
89 RemoveLineageCommand::RemoveLineageCommand(){   
90         try {
91                 abort = true; calledHelp = true; 
92                 setParameters();
93                 vector<string> tempOutNames;
94                 outputTypes["fasta"] = tempOutNames;
95                 outputTypes["taxonomy"] = tempOutNames;
96                 outputTypes["name"] = tempOutNames;
97                 outputTypes["group"] = tempOutNames;
98                 outputTypes["alignreport"] = tempOutNames;
99                 outputTypes["list"] = tempOutNames;
100         outputTypes["count"] = tempOutNames;
101         }
102         catch(exception& e) {
103                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
104                 exit(1);
105         }
106 }
107 //**********************************************************************************************************************
108 RemoveLineageCommand::RemoveLineageCommand(string option)  {
109         try {
110                 abort = false; calledHelp = false;   
111                                 
112                 //allow user to run help
113                 if(option == "help") { help(); abort = true; calledHelp = true; }
114                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
115                 
116                 else {
117                         vector<string> myArray = setParameters();       
118                         
119                         OptionParser parser(option);
120                         map<string,string> parameters = parser.getParameters();
121                         
122                         ValidParameters validParameter;
123                         map<string,string>::iterator it;
124                         
125                         //check to make sure all parameters are valid for command
126                         for (it = parameters.begin(); it != parameters.end(); it++) { 
127                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
128                         }
129                         
130                         //initialize outputTypes
131                         vector<string> tempOutNames;
132                         outputTypes["fasta"] = tempOutNames;
133                         outputTypes["taxonomy"] = tempOutNames;
134                         outputTypes["name"] = tempOutNames;
135                         outputTypes["group"] = tempOutNames;
136                         outputTypes["alignreport"] = tempOutNames;
137                         outputTypes["list"] = tempOutNames;
138             outputTypes["count"] = tempOutNames;
139                         
140                         //if the user changes the output directory command factory will send this info to us in the output parameter 
141                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
142                         
143                         //if the user changes the input directory command factory will send this info to us in the output parameter 
144                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
145                         if (inputDir == "not found"){   inputDir = "";          }
146                         else {
147                                 string path;
148                                 it = parameters.find("alignreport");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
154                                 }
155                                 
156                                 it = parameters.find("fasta");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
162                                 }
163                                 
164                                 it = parameters.find("list");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
170                                 }
171                                 
172                                 it = parameters.find("name");
173                                 //user has given a template file
174                                 if(it != parameters.end()){ 
175                                         path = m->hasPath(it->second);
176                                         //if the user has not given a path then, add inputdir. else leave path alone.
177                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
178                                 }
179                                 
180                                 it = parameters.find("group");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("taxonomy");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
194                                 }
195                 
196                 it = parameters.find("count");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
202                                 }
203                         }
204
205                         
206                         //check for required parameters                 
207                         fastafile = validParameter.validFile(parameters, "fasta", true);
208                         if (fastafile == "not open") { fastafile = ""; abort = true; }
209                         else if (fastafile == "not found") {  fastafile = "";  }        
210                         else { m->setFastaFile(fastafile); }
211                         
212                         namefile = validParameter.validFile(parameters, "name", true);
213                         if (namefile == "not open") { namefile = ""; abort = true; }
214                         else if (namefile == "not found") {  namefile = "";  }  
215                         else { m->setNameFile(namefile); }
216                         
217                         groupfile = validParameter.validFile(parameters, "group", true);
218                         if (groupfile == "not open") { abort = true; }
219                         else if (groupfile == "not found") {  groupfile = "";  }        
220                         else { m->setGroupFile(groupfile); }
221                         
222                         alignfile = validParameter.validFile(parameters, "alignreport", true);
223                         if (alignfile == "not open") { abort = true; }
224                         else if (alignfile == "not found") {  alignfile = "";  }
225                         
226                         listfile = validParameter.validFile(parameters, "list", true);
227                         if (listfile == "not open") { abort = true; }
228                         else if (listfile == "not found") {  listfile = "";  }
229                         else { m->setListFile(listfile); }
230                         
231                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
232                         if (taxfile == "not open") { taxfile = ""; abort = true; }
233                         else if (taxfile == "not found") {              
234                                 taxfile = m->getTaxonomyFile(); 
235                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
236                                 else {  m->mothurOut("You have no current taxonomy file and the taxonomy parameter is required."); m->mothurOutEndLine(); abort = true; }
237                         }else { m->setTaxonomyFile(taxfile); }
238                         
239             countfile = validParameter.validFile(parameters, "count", true);
240             if (countfile == "not open") { countfile = ""; abort = true; }
241             else if (countfile == "not found") { countfile = "";  }     
242             else { m->setCountTableFile(countfile); }
243             
244             if ((namefile != "") && (countfile != "")) {
245                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
246             }
247             
248             if ((groupfile != "") && (countfile != "")) {
249                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
250             }
251             
252                         string usedDups = "true";
253                         string temp = validParameter.validFile(parameters, "dups", false);      
254                         if (temp == "not found") { 
255                                 if (namefile != "") {  temp = "true";                                   }
256                                 else                            {  temp = "false"; usedDups = "";       }
257                         }
258                         dups = m->isTrue(temp);
259                         
260                         taxons = validParameter.validFile(parameters, "taxon", false);  
261                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
262                         else { 
263                                 //rip off quotes
264                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
265                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
266                         }
267                         m->splitAtChar(taxons, listOfTaxons, '-');
268                         
269                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (countfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, count, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
270                 
271                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
272                         
273                         if (countfile == "") {
274                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
275                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
276                     parser.getNameFile(files);
277                 }
278             }
279                         
280                 }
281
282         }
283         catch(exception& e) {
284                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
285                 exit(1);
286         }
287 }
288 //**********************************************************************************************************************
289
290 int RemoveLineageCommand::execute(){
291         try {
292                 
293                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
294                 
295                 if (m->control_pressed) { return 0; }
296         
297         if (countfile != "") {
298             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
299                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
300             }
301         }
302                 
303                 //read through the correct file and output lines you want to keep
304                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
305                 if (namefile != "")                     {               readName();             }
306                 if (fastafile != "")            {               readFasta();    }
307                 if (groupfile != "")            {               readGroup();    }
308                 if (alignfile != "")            {               readAlign();    }
309                 if (listfile != "")                     {               readList();             }
310         if (countfile != "")            {               readCount();    }
311                 
312                 
313                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);  } return 0; }
314                 
315                 if (outputNames.size() != 0) {
316                         m->mothurOutEndLine();
317                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
318                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
319                         m->mothurOutEndLine();
320                         
321                         //set fasta file as new current fastafile
322                         string current = "";
323                         itTypes = outputTypes.find("fasta");
324                         if (itTypes != outputTypes.end()) {
325                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
326                         }
327                         
328                         itTypes = outputTypes.find("name");
329                         if (itTypes != outputTypes.end()) {
330                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
331                         }
332                         
333                         itTypes = outputTypes.find("group");
334                         if (itTypes != outputTypes.end()) {
335                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
336                         }
337                         
338                         itTypes = outputTypes.find("list");
339                         if (itTypes != outputTypes.end()) {
340                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
341                         }
342                         
343                         itTypes = outputTypes.find("taxonomy");
344                         if (itTypes != outputTypes.end()) {
345                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
346                         }
347             
348             itTypes = outputTypes.find("count");
349                         if (itTypes != outputTypes.end()) {
350                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
351                         }
352                 }
353                 
354                 return 0;               
355         }
356
357         catch(exception& e) {
358                 m->errorOut(e, "RemoveLineageCommand", "execute");
359                 exit(1);
360         }
361 }
362
363 //**********************************************************************************************************************
364 int RemoveLineageCommand::readFasta(){
365         try {
366                 string thisOutputDir = outputDir;
367                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
368                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);         
369                 ofstream out;
370                 m->openOutputFile(outputFileName, out);
371                 
372                 ifstream in;
373                 m->openInputFile(fastafile, in);
374                 string name;
375                 
376                 bool wroteSomething = false;
377                 
378                 while(!in.eof()){
379                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
380                         
381                         Sequence currSeq(in);
382                         name = currSeq.getName();
383                         
384                         if (name != "") {
385                                 //if this name is in the accnos file
386                                 if (names.count(name) == 0) {
387                                         wroteSomething = true;
388                                         
389                                         currSeq.printSequence(out);
390                                 }
391                         }
392                         m->gobble(in);
393                 }
394                 in.close();     
395                 out.close();
396                 
397                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
398                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
399                 
400                 return 0;
401                 
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
405                 exit(1);
406         }
407 }
408 //**********************************************************************************************************************
409 int RemoveLineageCommand::readList(){
410         try {
411                 string thisOutputDir = outputDir;
412                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
413                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);            
414                 ofstream out;
415                 m->openOutputFile(outputFileName, out);
416                 
417                 ifstream in;
418                 m->openInputFile(listfile, in);
419                 
420                 bool wroteSomething = false;
421                 
422                 while(!in.eof()){
423                         //read in list vector
424                         ListVector list(in);
425                         
426                         //make a new list vector
427                         ListVector newList;
428                         newList.setLabel(list.getLabel());
429                         
430                         //for each bin
431                         for (int i = 0; i < list.getNumBins(); i++) {
432                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
433                         
434                                 //parse out names that are in accnos file
435                                 string binnames = list.get(i);
436                                 
437                                 string newNames = "";
438                                 while (binnames.find_first_of(',') != -1) { 
439                                         string name = binnames.substr(0,binnames.find_first_of(','));
440                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
441                                         
442                                         //if that name is in the .accnos file, add it
443                                         if (names.count(name) == 0) {  newNames += name + ",";  }
444                                 }
445                         
446                                 //get last name
447                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
448
449                                 //if there are names in this bin add to new list
450                                 if (newNames != "") {  
451                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
452                                         newList.push_back(newNames);    
453                                 }
454                         }
455                                 
456                         //print new listvector
457                         if (newList.getNumBins() != 0) {
458                                 wroteSomething = true;
459                                 newList.print(out);
460                         }
461                         
462                         m->gobble(in);
463                 }
464                 in.close();     
465                 out.close();
466                 
467                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
468                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
469                                 
470                 return 0;
471
472         }
473         catch(exception& e) {
474                 m->errorOut(e, "RemoveLineageCommand", "readList");
475                 exit(1);
476         }
477 }
478 //**********************************************************************************************************************
479 int RemoveLineageCommand::readName(){
480         try {
481                 string thisOutputDir = outputDir;
482                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
483                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
484                 ofstream out;
485                 m->openOutputFile(outputFileName, out);
486
487                 ifstream in;
488                 m->openInputFile(namefile, in);
489                 string name, firstCol, secondCol;
490                 
491                 bool wroteSomething = false;
492                 
493                 while(!in.eof()){
494                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
495
496                         in >> firstCol;                         
497                         in >> secondCol;                        
498
499                         vector<string> parsedNames;
500                         m->splitAtComma(secondCol, parsedNames);
501                         
502                         vector<string> validSecond;  validSecond.clear();
503                         for (int i = 0; i < parsedNames.size(); i++) {
504                                 if (names.count(parsedNames[i]) == 0) {
505                                         validSecond.push_back(parsedNames[i]);
506                                 }
507                         }
508                         
509                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
510                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
511                         }else {
512                                         //if the name in the first column is in the set then print it and any other names in second column also in set
513                                 if (names.count(firstCol) == 0) {
514                                         
515                                         wroteSomething = true;
516                                         
517                                         out << firstCol << '\t';
518                                         
519                                         //you know you have at least one valid second since first column is valid
520                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
521                                         out << validSecond[validSecond.size()-1] << endl;
522                                         
523                                         //make first name in set you come to first column and then add the remaining names to second column
524                                 }else {
525                                         
526                                         //you want part of this row
527                                         if (validSecond.size() != 0) {
528                                                 
529                                                 wroteSomething = true;
530                                                 
531                                                 out << validSecond[0] << '\t';
532                                                 
533                                                 //you know you have at least one valid second since first column is valid
534                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
535                                                 out << validSecond[validSecond.size()-1] << endl;
536                                         }
537                                 }
538                         }
539                         m->gobble(in);
540                 }
541                 in.close();
542                 out.close();
543
544                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
545                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
546                                 
547                 return 0;
548         }
549         catch(exception& e) {
550                 m->errorOut(e, "RemoveLineageCommand", "readName");
551                 exit(1);
552         }
553 }
554 //**********************************************************************************************************************
555 int RemoveLineageCommand::readCount(){
556         try {
557                 string thisOutputDir = outputDir;
558                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
559                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
560                 
561                 ofstream out;
562                 m->openOutputFile(outputFileName, out);
563                 
564                 ifstream in;
565                 m->openInputFile(countfile, in);
566                 
567                 bool wroteSomething = false;
568                 
569         string headers = m->getline(in); m->gobble(in);
570         out << headers << endl;
571         
572         string name, rest; int thisTotal;
573         while (!in.eof()) {
574             
575             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
576             
577             in >> name; m->gobble(in); 
578             in >> thisTotal; m->gobble(in);
579             rest = m->getline(in); m->gobble(in);
580             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
581             
582             if (names.count(name) == 0) {
583                 out << name << '\t' << thisTotal << '\t' << rest << endl;
584                 wroteSomething = true;
585             }
586         }
587         in.close();
588                 out.close();
589         
590         //check for groups that have been eliminated
591         CountTable ct;
592         if (ct.testGroups(outputFileName)) {
593             ct.readTable(outputFileName);
594             ct.printTable(outputFileName);
595         }
596                 
597                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
598                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
599         
600                 return 0;
601         }
602         catch(exception& e) {
603                 m->errorOut(e, "RemoveLineageCommand", "readCount");
604                 exit(1);
605         }
606 }
607 //**********************************************************************************************************************
608 int RemoveLineageCommand::readGroup(){
609         try {
610                 string thisOutputDir = outputDir;
611                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
612                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);         
613                 ofstream out;
614                 m->openOutputFile(outputFileName, out);
615
616                 ifstream in;
617                 m->openInputFile(groupfile, in);
618                 string name, group;
619                 
620                 bool wroteSomething = false;
621                 
622                 while(!in.eof()){
623                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
624                         
625                         in >> name;                             //read from first column
626                         in >> group;                    //read from second column
627                         
628                         //if this name is in the accnos file
629                         if (names.count(name) == 0) {
630                                 wroteSomething = true;
631                                 out << name << '\t' << group << endl;
632                         }
633                                         
634                         m->gobble(in);
635                 }
636                 in.close();
637                 out.close();
638                 
639                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
640                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
641                 
642                 return 0;
643         }
644         catch(exception& e) {
645                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
646                 exit(1);
647         }
648 }
649 //**********************************************************************************************************************
650 int RemoveLineageCommand::readTax(){
651         try {
652                 string thisOutputDir = outputDir;
653                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
654                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
655                 ofstream out;
656                 m->openOutputFile(outputFileName, out);
657                 
658                 ifstream in;
659                 m->openInputFile(taxfile, in);
660                 string name, tax;
661                 
662                 bool wroteSomething = false;
663                 
664                 vector<bool> taxonsHasConfidence; taxonsHasConfidence.resize(listOfTaxons.size(), false);
665                 vector< vector< map<string, float> > > searchTaxons; searchTaxons.resize(listOfTaxons.size());
666                 vector<string> noConfidenceTaxons; noConfidenceTaxons.resize(listOfTaxons.size(), "");
667                 
668                 for (int i = 0; i < listOfTaxons.size(); i++) {
669                         noConfidenceTaxons[i] = listOfTaxons[i];
670                         int hasConPos = listOfTaxons[i].find_first_of('(');
671                         if (hasConPos != string::npos) {  
672                                 taxonsHasConfidence[i] = true; 
673                                 searchTaxons[i] = getTaxons(listOfTaxons[i]); 
674                                 noConfidenceTaxons[i] = listOfTaxons[i];
675                                 m->removeConfidences(noConfidenceTaxons[i]);
676                         }
677                 }
678                 
679                 
680                 while(!in.eof()){
681
682                         if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }
683
684                         in >> name;                             //read from first column
685                         in >> tax;                      //read from second column
686                         
687                         bool remove = false;
688                         
689             string noQuotesTax = m->removeQuotes(tax);
690             
691                         for (int j = 0; j < listOfTaxons.size(); j++) {
692                                 string newtax = noQuotesTax;
693                                 
694                                 //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
695                                 if (!taxonsHasConfidence[j]) {
696                                         
697                                         int hasConfidences = noQuotesTax.find_first_of('(');
698                                         if (hasConfidences != string::npos) { 
699                                                 newtax = noQuotesTax;
700                                                 m->removeConfidences(newtax);
701                                         }
702                                         
703                                         int pos = newtax.find(noConfidenceTaxons[j]);
704                                         
705                                         if (pos == string::npos) { 
706                                                 //wroteSomething = true;
707                                                 //out << name << '\t' << tax << endl;
708                                         }else{ //this sequence contains the taxon the user wants to remove
709                                                 names.insert(name);
710                                                 remove=true; break;
711                                         }
712                                         
713                                 }else{//if taxons has them and you don't them remove taxons
714                                         int hasConfidences = noQuotesTax.find_first_of('(');
715                                         if (hasConfidences == string::npos) { 
716                                                 
717                                                 int pos = newtax.find(noConfidenceTaxons[j]);
718                                                 
719                                                 if (pos == string::npos) { 
720                                                         //wroteSomething = true;
721                                                         //out << name << '\t' << tax << endl;
722                                                 }else{ //this sequence contains the taxon the user wants to remove
723                                                         names.insert(name);
724                                                         remove=true; break;
725                                                 }
726                                         }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
727                                                 //first remove confidences from both and see if the taxonomy exists
728                                                 
729                                                 string noNewTax = noQuotesTax;
730                                                 int hasConfidences = noQuotesTax.find_first_of('(');
731                                                 if (hasConfidences != string::npos) { 
732                                                         noNewTax = noQuotesTax;
733                                                         m->removeConfidences(noNewTax);
734                                                 }
735                                                 
736                                                 int pos = noNewTax.find(noConfidenceTaxons[j]);
737                                                 
738                                                 if (pos != string::npos) { //if yes, then are the confidences okay
739                                                         
740                                                         vector< map<string, float> > usersTaxon = getTaxons(newtax);
741                                                         
742                                                         //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
743                                                         //we want to "line them up", so we will find the the index where the searchstring starts
744                                                         int index = 0;
745                                                         for (int i = 0; i < usersTaxon.size(); i++) {
746                                                                 
747                                                                 if (usersTaxon[i].begin()->first == searchTaxons[j][0].begin()->first) { 
748                                                                         index = i;  
749                                                                         int spot = 0;
750                                                                         bool goodspot = true;
751                                                                         //is this really the start, or are we dealing with a taxon of the same name?
752                                                                         while ((spot < searchTaxons[j].size()) && ((i+spot) < usersTaxon.size())) {
753                                                                                 if (usersTaxon[i+spot].begin()->first != searchTaxons[j][spot].begin()->first) { goodspot = false; break; }
754                                                                                 else { spot++; }
755                                                                         }
756                                                                         
757                                                                         if (goodspot) { break; }
758                                                                 }
759                                                         }
760                                                         
761                                                         for (int i = 0; i < searchTaxons[j].size(); i++) {
762                                                                 
763                                                                 if ((i+index) < usersTaxon.size()) { //just in case, should never be false
764                                                                         if (usersTaxon[i+index].begin()->second < searchTaxons[j][i].begin()->second) { //is the users cutoff less than the search taxons
765                                                                                 remove = true;
766                                                                                 break;
767                                                                         }
768                                                                 }else {
769                                                                         remove = true;
770                                                                         break;
771                                                                 }
772                                                         }
773                                                         
774                                                         //passed the test so remove you
775                                                         if (remove) {
776                                                                 names.insert(name);
777                                                                 remove=true; break;
778                                                         }else {
779                                                                 //wroteSomething = true;
780                                                                 //out << name << '\t' << tax << endl;
781                                                         }
782                                                 }else {
783                                                         //wroteSomething = true;
784                                                         //out << name << '\t' << tax << endl;
785                                                 }
786                                         }
787                                 }
788                                 
789                         }
790                         
791                         if (!remove) {  wroteSomething = true; out << name << '\t' << tax << endl; }
792                         m->gobble(in);
793                 }
794                 in.close();
795                 out.close();
796                 
797                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
798                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
799                         
800                 return 0;
801
802         }
803         catch(exception& e) {
804                 m->errorOut(e, "RemoveLineageCommand", "readTax");
805                 exit(1);
806         }
807 }
808 /**************************************************************************************************/
809 vector< map<string, float> > RemoveLineageCommand::getTaxons(string tax) {
810         try {
811                 
812                 vector< map<string, float> > t;
813                 string taxon = "";
814                 int taxLength = tax.length();
815                 for(int i=0;i<taxLength;i++){
816                         if(tax[i] == ';'){
817                                 
818                                 int openParen = taxon.find_first_of('(');
819                                 int closeParen = taxon.find_last_of(')');
820                                 
821                                 string newtaxon, confidence;
822                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
823                                         newtaxon = taxon.substr(0, openParen); //rip off confidence
824                                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
825                                 }else{
826                                         newtaxon = taxon;
827                                         confidence = "0";
828                                 }
829                                 float con = 0;
830                                 convert(confidence, con);
831                                 
832                                 map<string, float> temp;
833                                 temp[newtaxon] = con;
834                                 t.push_back(temp);
835                                 
836                                 taxon = "";
837                         }
838                         else{
839                                 taxon += tax[i];
840                         }
841                 }
842                 
843                 return t;
844         }
845         catch(exception& e) {
846                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
847                 exit(1);
848         }
849 }
850 //**********************************************************************************************************************
851 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
852 int RemoveLineageCommand::readAlign(){
853         try {
854                 string thisOutputDir = outputDir;
855                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
856                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + getOutputFileNameTag("alignreport");
857                 
858                 ofstream out;
859                 m->openOutputFile(outputFileName, out);
860
861                 ifstream in;
862                 m->openInputFile(alignfile, in);
863                 string name, junk;
864                 
865                 bool wroteSomething = false;
866                 
867                 //read column headers
868                 for (int i = 0; i < 16; i++) {  
869                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
870                         else                    {       break;                  }
871                 }
872                 out << endl;
873                 
874                 while(!in.eof()){
875                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
876                         
877                         in >> name;                             //read from first column
878                         
879                         //if this name is in the accnos file
880                         if (names.count(name) == 0) {
881                                 wroteSomething = true;
882                                 
883                                 out << name << '\t';
884                                 
885                                 //read rest
886                                 for (int i = 0; i < 15; i++) {  
887                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
888                                         else                    {       break;                  }
889                                 }
890                                 out << endl;
891                                 
892                         }else {//still read just don't do anything with it
893                                 
894                                 //read rest
895                                 for (int i = 0; i < 15; i++) {  
896                                         if (!in.eof())  {       in >> junk;             }
897                                         else                    {       break;                  }
898                                 }
899                         }
900                         
901                         m->gobble(in);
902                 }
903                 in.close();
904                 out.close();
905                 
906                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
907                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
908                 
909                 return 0;
910                 
911         }
912         catch(exception& e) {
913                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
914                 exit(1);
915         }
916 }
917 //**********************************************************************************************************************
918