]> git.donarmstrong.com Git - mothur.git/blob - removelineagecommand.cpp
mods in testing 1.16.0
[mothur.git] / removelineagecommand.cpp
1 /*
2  *  removelineagecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/24/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removelineagecommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13
14 //**********************************************************************************************************************
15 vector<string> RemoveLineageCommand::getValidParameters(){      
16         try {
17                 string Array[] =  {"fasta","name", "group", "alignreport", "taxon", "dups", "list","taxonomy","outputdir","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "RemoveLineageCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 RemoveLineageCommand::RemoveLineageCommand(){   
28         try {
29                 abort = true;
30                 //initialize outputTypes
31                 vector<string> tempOutNames;
32                 outputTypes["fasta"] = tempOutNames;
33                 outputTypes["taxonomy"] = tempOutNames;
34                 outputTypes["name"] = tempOutNames;
35                 outputTypes["group"] = tempOutNames;
36                 outputTypes["alignreport"] = tempOutNames;
37                 outputTypes["list"] = tempOutNames;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 vector<string> RemoveLineageCommand::getRequiredParameters(){   
46         try {
47                 string Array[] =  {"taxonomy"};
48                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
49                 return myArray;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "RemoveLineageCommand", "getRequiredParameters");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 vector<string> RemoveLineageCommand::getRequiredFiles(){        
58         try {
59                 vector<string> myArray;
60                 return myArray;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "RemoveLineageCommand", "getRequiredFiles");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 RemoveLineageCommand::RemoveLineageCommand(string option)  {
69         try {
70                 abort = false;
71                                 
72                 //allow user to run help
73                 if(option == "help") { help(); abort = true; }
74                 
75                 else {
76                         //valid paramters for this command
77                         string Array[] =  {"fasta","name", "group", "alignreport", "taxon", "dups", "list","taxonomy","outputdir","inputdir"};
78                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
79                         
80                         OptionParser parser(option);
81                         map<string,string> parameters = parser.getParameters();
82                         
83                         ValidParameters validParameter;
84                         map<string,string>::iterator it;
85                         
86                         //check to make sure all parameters are valid for command
87                         for (it = parameters.begin(); it != parameters.end(); it++) { 
88                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
89                         }
90                         
91                         //initialize outputTypes
92                         vector<string> tempOutNames;
93                         outputTypes["fasta"] = tempOutNames;
94                         outputTypes["taxonomy"] = tempOutNames;
95                         outputTypes["name"] = tempOutNames;
96                         outputTypes["group"] = tempOutNames;
97                         outputTypes["alignreport"] = tempOutNames;
98                         outputTypes["list"] = tempOutNames;
99                         
100                         //if the user changes the output directory command factory will send this info to us in the output parameter 
101                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
102                         
103                         //if the user changes the input directory command factory will send this info to us in the output parameter 
104                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
105                         if (inputDir == "not found"){   inputDir = "";          }
106                         else {
107                                 string path;
108                                 it = parameters.find("alignreport");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
114                                 }
115                                 
116                                 it = parameters.find("fasta");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
122                                 }
123                                 
124                                 it = parameters.find("list");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
130                                 }
131                                 
132                                 it = parameters.find("name");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
138                                 }
139                                 
140                                 it = parameters.find("group");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
146                                 }
147                                 
148                                 it = parameters.find("taxonomy");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
154                                 }
155                         }
156
157                         
158                         //check for required parameters                 
159                         fastafile = validParameter.validFile(parameters, "fasta", true);
160                         if (fastafile == "not open") { abort = true; }
161                         else if (fastafile == "not found") {  fastafile = "";  }        
162                         
163                         namefile = validParameter.validFile(parameters, "name", true);
164                         if (namefile == "not open") { abort = true; }
165                         else if (namefile == "not found") {  namefile = "";  }  
166                         
167                         groupfile = validParameter.validFile(parameters, "group", true);
168                         if (groupfile == "not open") { abort = true; }
169                         else if (groupfile == "not found") {  groupfile = "";  }        
170                         
171                         alignfile = validParameter.validFile(parameters, "alignreport", true);
172                         if (alignfile == "not open") { abort = true; }
173                         else if (alignfile == "not found") {  alignfile = "";  }
174                         
175                         listfile = validParameter.validFile(parameters, "list", true);
176                         if (listfile == "not open") { abort = true; }
177                         else if (listfile == "not found") {  listfile = "";  }
178                         
179                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
180                         if (taxfile == "not open") { abort = true; }
181                         else if (taxfile == "not found") {  taxfile = ""; m->mothurOut("The taxonomy parameter is required for the get.lineage command."); m->mothurOutEndLine();  abort = true; }
182                         
183                         string usedDups = "true";
184                         string temp = validParameter.validFile(parameters, "dups", false);      
185                         if (temp == "not found") { 
186                                 if (namefile != "") {  temp = "true";                                   }
187                                 else                            {  temp = "false"; usedDups = "";       }
188                         }
189                         dups = m->isTrue(temp);
190                         
191                         taxons = validParameter.validFile(parameters, "taxon", false);  
192                         if (taxons == "not found") { taxons = "";  m->mothurOut("No taxons given, please correct."); m->mothurOutEndLine();  abort = true;  }
193                         else { 
194                                 //rip off quotes
195                                 if (taxons[0] == '\'') {  taxons = taxons.substr(1); }
196                                 if (taxons[(taxons.length()-1)] == '\'') {  taxons = taxons.substr(0, (taxons.length()-1)); }
197                         }
198                         
199                         
200                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
201                 
202                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
203
204                 }
205
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "RemoveLineageCommand", "RemoveLineageCommand");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213
214 void RemoveLineageCommand::help(){
215         try {
216                 m->mothurOut("The remove.lineage command reads a taxonomy file and any of the following file types: fasta, name, group, list or alignreport file.\n");
217                 m->mothurOut("It outputs a file containing only the sequences from the taxonomy file that are not from the taxon you requested to be removed.\n");
218                 m->mothurOut("The remove.lineage command parameters are taxon, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide taxonomy and taxon.\n");
219                 m->mothurOut("The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n");
220                 m->mothurOut("The taxon parameter allows you to select the taxons you would like to remove.\n");
221                 m->mothurOut("You may enter your taxons with confidence scores, doing so will remove only those sequences that belong to the taxonomy and whose cofidence scores fall below the scores you give.\n");
222                 m->mothurOut("If they belong to the taxonomy and have confidences above those you provide the sequence will not be removed.\n");
223                 m->mothurOut("The remove.lineage command should be in the following format: remove.lineage(taxonomy=yourTaxonomyFile, taxon=yourTaxons).\n");
224                 m->mothurOut("Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon=Bacteria;Firmicutes;Bacilli;Lactobacillales;).\n");
225                 m->mothurOut("Note: If you are running mothur in script mode you must wrap the taxon in ' characters so mothur will ignore the ; in the taxon.\n");
226                 m->mothurOut("Example remove.lineage(taxonomy=amazon.silva.taxonomy, taxon='Bacteria;Firmicutes;Bacilli;Lactobacillales;').\n");
227                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
228         }
229         catch(exception& e) {
230                 m->errorOut(e, "RemoveLineageCommand", "help");
231                 exit(1);
232         }
233 }
234
235 //**********************************************************************************************************************
236
237 int RemoveLineageCommand::execute(){
238         try {
239                 
240                 if (abort == true) { return 0; }
241                 
242                 if (m->control_pressed) { return 0; }
243                 
244                 //read through the correct file and output lines you want to keep
245                 if (taxfile != "")                      {               readTax();              }  //fills the set of names to remove
246                 if (namefile != "")                     {               readName();             }
247                 if (fastafile != "")            {               readFasta();    }
248                 if (groupfile != "")            {               readGroup();    }
249                 if (alignfile != "")            {               readAlign();    }
250                 if (listfile != "")                     {               readList();             }
251                 
252                 
253                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  } return 0; }
254                 
255                 if (outputNames.size() != 0) {
256                         m->mothurOutEndLine();
257                         m->mothurOut("Output File Names: "); m->mothurOutEndLine();
258                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
259                         m->mothurOutEndLine();
260                 }
261                 
262                 return 0;               
263         }
264
265         catch(exception& e) {
266                 m->errorOut(e, "RemoveLineageCommand", "execute");
267                 exit(1);
268         }
269 }
270
271 //**********************************************************************************************************************
272 int RemoveLineageCommand::readFasta(){
273         try {
274                 string thisOutputDir = outputDir;
275                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
276                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
277                 
278                 ofstream out;
279                 m->openOutputFile(outputFileName, out);
280                 
281                 ifstream in;
282                 m->openInputFile(fastafile, in);
283                 string name;
284                 
285                 bool wroteSomething = false;
286                 
287                 while(!in.eof()){
288                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
289                         
290                         Sequence currSeq(in);
291                         name = currSeq.getName();
292                         
293                         if (name != "") {
294                                 //if this name is in the accnos file
295                                 if (names.count(name) == 0) {
296                                         wroteSomething = true;
297                                         
298                                         currSeq.printSequence(out);
299                                 }
300                         }
301                         m->gobble(in);
302                 }
303                 in.close();     
304                 out.close();
305                 
306                 if (wroteSomething == false) {  m->mothurOut("Your fasta file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
307                 outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); 
308                 
309                 return 0;
310                 
311         }
312         catch(exception& e) {
313                 m->errorOut(e, "RemoveLineageCommand", "readFasta");
314                 exit(1);
315         }
316 }
317 //**********************************************************************************************************************
318 int RemoveLineageCommand::readList(){
319         try {
320                 string thisOutputDir = outputDir;
321                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
322                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
323                 
324                 ofstream out;
325                 m->openOutputFile(outputFileName, out);
326                 
327                 ifstream in;
328                 m->openInputFile(listfile, in);
329                 
330                 bool wroteSomething = false;
331                 
332                 while(!in.eof()){
333                         //read in list vector
334                         ListVector list(in);
335                         
336                         //make a new list vector
337                         ListVector newList;
338                         newList.setLabel(list.getLabel());
339                         
340                         //for each bin
341                         for (int i = 0; i < list.getNumBins(); i++) {
342                                 if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
343                         
344                                 //parse out names that are in accnos file
345                                 string binnames = list.get(i);
346                                 
347                                 string newNames = "";
348                                 while (binnames.find_first_of(',') != -1) { 
349                                         string name = binnames.substr(0,binnames.find_first_of(','));
350                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
351                                         
352                                         //if that name is in the .accnos file, add it
353                                         if (names.count(name) == 0) {  newNames += name + ",";  }
354                                 }
355                         
356                                 //get last name
357                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
358
359                                 //if there are names in this bin add to new list
360                                 if (newNames != "") {  
361                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
362                                         newList.push_back(newNames);    
363                                 }
364                         }
365                                 
366                         //print new listvector
367                         if (newList.getNumBins() != 0) {
368                                 wroteSomething = true;
369                                 newList.print(out);
370                         }
371                         
372                         m->gobble(in);
373                 }
374                 in.close();     
375                 out.close();
376                 
377                 if (wroteSomething == false) {  m->mothurOut("Your list file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
378                 outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); 
379                                 
380                 return 0;
381
382         }
383         catch(exception& e) {
384                 m->errorOut(e, "RemoveLineageCommand", "readList");
385                 exit(1);
386         }
387 }
388 //**********************************************************************************************************************
389 int RemoveLineageCommand::readName(){
390         try {
391                 string thisOutputDir = outputDir;
392                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
393                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
394
395                 ofstream out;
396                 m->openOutputFile(outputFileName, out);
397
398                 ifstream in;
399                 m->openInputFile(namefile, in);
400                 string name, firstCol, secondCol;
401                 
402                 bool wroteSomething = false;
403                 
404                 while(!in.eof()){
405                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
406
407                         in >> firstCol;                         
408                         in >> secondCol;                        
409
410                         vector<string> parsedNames;
411                         //parse second column saving each name
412                         while (secondCol.find_first_of(',') != -1) { 
413                                 name = secondCol.substr(0,secondCol.find_first_of(','));
414                                 secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
415                                 parsedNames.push_back(name);
416                         }
417                         
418                         //get name after last ,
419                         parsedNames.push_back(secondCol);
420
421                         vector<string> validSecond;  validSecond.clear();
422                         for (int i = 0; i < parsedNames.size(); i++) {
423                                 if (names.count(parsedNames[i]) == 0) {
424                                         validSecond.push_back(parsedNames[i]);
425                                 }
426                         }
427                         
428                         if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
429                                 for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
430                         }else {
431                                         //if the name in the first column is in the set then print it and any other names in second column also in set
432                                 if (names.count(firstCol) == 0) {
433                                         
434                                         wroteSomething = true;
435                                         
436                                         out << firstCol << '\t';
437                                         
438                                         //you know you have at least one valid second since first column is valid
439                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
440                                         out << validSecond[validSecond.size()-1] << endl;
441                                         
442                                         //make first name in set you come to first column and then add the remaining names to second column
443                                 }else {
444                                         
445                                         //you want part of this row
446                                         if (validSecond.size() != 0) {
447                                                 
448                                                 wroteSomething = true;
449                                                 
450                                                 out << validSecond[0] << '\t';
451                                                 
452                                                 //you know you have at least one valid second since first column is valid
453                                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
454                                                 out << validSecond[validSecond.size()-1] << endl;
455                                         }
456                                 }
457                         }
458                         m->gobble(in);
459                 }
460                 in.close();
461                 out.close();
462
463                 if (wroteSomething == false) {  m->mothurOut("Your name file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
464                 outputNames.push_back(outputFileName); outputTypes["name"].push_back(outputFileName);
465                                 
466                 return 0;
467         }
468         catch(exception& e) {
469                 m->errorOut(e, "RemoveLineageCommand", "readName");
470                 exit(1);
471         }
472 }
473
474 //**********************************************************************************************************************
475 int RemoveLineageCommand::readGroup(){
476         try {
477                 string thisOutputDir = outputDir;
478                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
479                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
480                 
481                 ofstream out;
482                 m->openOutputFile(outputFileName, out);
483
484                 ifstream in;
485                 m->openInputFile(groupfile, in);
486                 string name, group;
487                 
488                 bool wroteSomething = false;
489                 
490                 while(!in.eof()){
491                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
492                         
493                         in >> name;                             //read from first column
494                         in >> group;                    //read from second column
495                         
496                         //if this name is in the accnos file
497                         if (names.count(name) == 0) {
498                                 wroteSomething = true;
499                                 out << name << '\t' << group << endl;
500                         }
501                                         
502                         m->gobble(in);
503                 }
504                 in.close();
505                 out.close();
506                 
507                 if (wroteSomething == false) {  m->mothurOut("Your group file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
508                 outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputFileName);
509                 
510                 return 0;
511         }
512         catch(exception& e) {
513                 m->errorOut(e, "RemoveLineageCommand", "readGroup");
514                 exit(1);
515         }
516 }
517 //**********************************************************************************************************************
518 int RemoveLineageCommand::readTax(){
519         try {
520                 string thisOutputDir = outputDir;
521                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
522                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
523                 ofstream out;
524                 m->openOutputFile(outputFileName, out);
525                 
526                 ifstream in;
527                 m->openInputFile(taxfile, in);
528                 string name, tax;
529                 
530                 bool wroteSomething = false;
531                 
532                 bool taxonsHasConfidence = false;
533                 vector< map<string, int> > searchTaxons;
534                 string noConfidenceTaxons = taxons;
535                 int hasConPos = taxons.find_first_of('(');
536                 if (hasConPos != string::npos) {  
537                         taxonsHasConfidence = true; 
538                         searchTaxons = getTaxons(taxons); 
539                         noConfidenceTaxons = removeConfidences(taxons);
540                 }
541                 
542                 
543                 while(!in.eof()){
544
545                         if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
546
547                         in >> name;                             //read from first column
548                         in >> tax;                      //read from second column
549                         
550                         string newtax = tax;
551                         
552                         //if the users file contains confidence scores we want to ignore them when searching for the taxons, unless the taxon has them
553                         if (!taxonsHasConfidence) {
554                                 
555                                 int hasConfidences = tax.find_first_of('(');
556                                 if (hasConfidences != string::npos) { 
557                                         newtax = removeConfidences(tax);
558                                 }
559                                 
560                                 int pos = newtax.find(taxons);
561                                 
562                                 if (pos == string::npos) { 
563                                         wroteSomething = true;
564                                         out << name << '\t' << tax << endl;
565                                 }else{ //this sequence contains the taxon the user wants to remove
566                                         names.insert(name);
567                                 }
568                                 
569                         }else{//if taxons has them and you don't them remove taxons
570                                 int hasConfidences = tax.find_first_of('(');
571                                 if (hasConfidences == string::npos) { 
572                                 
573                                         int pos = newtax.find(noConfidenceTaxons);
574                                         
575                                         if (pos == string::npos) { 
576                                                 wroteSomething = true;
577                                                 out << name << '\t' << tax << endl;
578                                         }else{ //this sequence contains the taxon the user wants to remove
579                                                 names.insert(name);
580                                         }
581                                 }else { //both have confidences so we want to make sure the users confidences are greater then or equal to the taxons
582                                         //first remove confidences from both and see if the taxonomy exists
583                                 
584                                         string noNewTax = tax;
585                                         int hasConfidences = tax.find_first_of('(');
586                                         if (hasConfidences != string::npos) { 
587                                                 noNewTax = removeConfidences(tax);
588                                         }
589                                         
590                                         int pos = noNewTax.find(noConfidenceTaxons);
591                                         
592                                         if (pos != string::npos) { //if yes, then are the confidences okay
593                                                 
594                                                 bool remove = false;
595                                                 vector< map<string, int> > usersTaxon = getTaxons(newtax);
596                                                 
597                                                 //the usersTaxon is most likely longer than the searchTaxons, and searchTaxon[0] may relate to userTaxon[4]
598                                                 //we want to "line them up", so we will find the the index where the searchstring starts
599                                                 int index = 0;
600                                                 for (int i = 0; i < usersTaxon.size(); i++) {
601                                                         
602                                                         if (usersTaxon[i].begin()->first == searchTaxons[0].begin()->first) { 
603                                                                 index = i;  
604                                                                 int spot = 0;
605                                                                 bool goodspot = true;
606                                                                 //is this really the start, or are we dealing with a taxon of the same name?
607                                                                 while ((spot < searchTaxons.size()) && ((i+spot) < usersTaxon.size())) {
608                                                                         if (usersTaxon[i+spot].begin()->first != searchTaxons[spot].begin()->first) { goodspot = false; break; }
609                                                                         else { spot++; }
610                                                                 }
611                                                                 
612                                                                 if (goodspot) { break; }
613                                                         }
614                                                 }
615                                                 
616                                                 for (int i = 0; i < searchTaxons.size(); i++) {
617                                                         
618                                                         if ((i+index) < usersTaxon.size()) { //just in case, should never be false
619                                                                 if (usersTaxon[i+index].begin()->second < searchTaxons[i].begin()->second) { //is the users cutoff less than the search taxons
620                                                                         remove = true;
621                                                                         break;
622                                                                 }
623                                                         }else {
624                                                                 remove = true;
625                                                                 break;
626                                                         }
627                                                 }
628                                                 
629                                                 //passed the test so remove you
630                                                 if (remove) {
631                                                         names.insert(name);
632                                                 }else {
633                                                         wroteSomething = true;
634                                                         out << name << '\t' << tax << endl;
635                                                 }
636                                         }else {
637                                                 wroteSomething = true;
638                                                 out << name << '\t' << tax << endl;
639                                         }
640                                 }
641                         }
642                         
643                         
644                         
645                         
646                         m->gobble(in);
647                 }
648                 in.close();
649                 out.close();
650                 
651                 if (!wroteSomething) { m->mothurOut("Your taxonomy file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
652                 outputNames.push_back(outputFileName); outputTypes["taxonomy"].push_back(outputFileName);
653                         
654                 return 0;
655
656         }
657         catch(exception& e) {
658                 m->errorOut(e, "RemoveLineageCommand", "readTax");
659                 exit(1);
660         }
661 }
662 /**************************************************************************************************/
663 vector< map<string, int> > RemoveLineageCommand::getTaxons(string tax) {
664         try {
665                 
666                 vector< map<string, int> > t;
667                 string taxon = "";
668                 int taxLength = tax.length();
669                 for(int i=0;i<taxLength;i++){
670                         if(tax[i] == ';'){
671                                 
672                                 int openParen = taxon.find_first_of('(');
673                                 int closeParen = taxon.find_last_of(')');
674                                 
675                                 string newtaxon, confidence;
676                                 if ((openParen != string::npos) && (closeParen != string::npos)) {
677                                         newtaxon = taxon.substr(0, openParen); //rip off confidence
678                                         confidence = taxon.substr((openParen+1), (closeParen-openParen-1));  
679                                 }else{
680                                         newtaxon = taxon;
681                                         confidence = "0";
682                                 }
683                                 int con = 0;
684                                 convert(confidence, con);
685                                 
686                                 map<string, int> temp;
687                                 temp[newtaxon] = con;
688                                 t.push_back(temp);
689                                 
690                                 taxon = "";
691                         }
692                         else{
693                                 taxon += tax[i];
694                         }
695                 }
696                 
697                 return t;
698         }
699         catch(exception& e) {
700                 m->errorOut(e, "RemoveLineageCommand", "getTaxons");
701                 exit(1);
702         }
703 }
704 /**************************************************************************************************/
705 string RemoveLineageCommand::removeConfidences(string tax) {
706         try {
707                 
708                 string taxon = "";
709                 int taxLength = tax.length();
710                 for(int i=0;i<taxLength;i++){
711                         if(tax[i] == ';'){
712                                 taxon = taxon.substr(0, taxon.find_first_of('(')); //rip off confidence
713                                 taxon += ";";
714                         }
715                         else{
716                                 taxon += tax[i];
717                         }
718                 }
719                                 
720                 return taxon;
721         }
722         catch(exception& e) {
723                 m->errorOut(e, "RemoveLineageCommand", "removeConfidences");
724                 exit(1);
725         }
726 }
727 //**********************************************************************************************************************
728 //alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
729 int RemoveLineageCommand::readAlign(){
730         try {
731                 string thisOutputDir = outputDir;
732                 if (outputDir == "") {  thisOutputDir += m->hasPath(alignfile);  }
733                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(alignfile)) + "pick.align.report";
734                 
735                 ofstream out;
736                 m->openOutputFile(outputFileName, out);
737
738                 ifstream in;
739                 m->openInputFile(alignfile, in);
740                 string name, junk;
741                 
742                 bool wroteSomething = false;
743                 
744                 //read column headers
745                 for (int i = 0; i < 16; i++) {  
746                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
747                         else                    {       break;                  }
748                 }
749                 out << endl;
750                 
751                 while(!in.eof()){
752                         if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
753                         
754                         in >> name;                             //read from first column
755                         
756                         //if this name is in the accnos file
757                         if (names.count(name) == 0) {
758                                 wroteSomething = true;
759                                 
760                                 out << name << '\t';
761                                 
762                                 //read rest
763                                 for (int i = 0; i < 15; i++) {  
764                                         if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
765                                         else                    {       break;                  }
766                                 }
767                                 out << endl;
768                                 
769                         }else {//still read just don't do anything with it
770                                 
771                                 //read rest
772                                 for (int i = 0; i < 15; i++) {  
773                                         if (!in.eof())  {       in >> junk;             }
774                                         else                    {       break;                  }
775                                 }
776                         }
777                         
778                         m->gobble(in);
779                 }
780                 in.close();
781                 out.close();
782                 
783                 if (wroteSomething == false) {  m->mothurOut("Your align file contains only sequences from " + taxons + "."); m->mothurOutEndLine();  }
784                 outputNames.push_back(outputFileName); outputTypes["alignreport"].push_back(outputFileName);
785                 
786                 return 0;
787                 
788         }
789         catch(exception& e) {
790                 m->errorOut(e, "RemoveLineageCommand", "readAlign");
791                 exit(1);
792         }
793 }
794 //**********************************************************************************************************************
795