2 * removeseqscommand.cpp
5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "counttable.h"
15 //**********************************************************************************************************************
16 vector<string> RemoveSeqsCommand::setParameters(){
18 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "FNGLT", "none","fastq",false,false,true); parameters.push_back(pfastq);
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false,true); parameters.push_back(pfasta);
20 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
21 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "FNGLT", "none","count",false,false,true); parameters.push_back(pcount);
22 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "FNGLT", "none","group",false,false,true); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
24 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
25 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "FNGLT", "none","alignreport",false,false); parameters.push_back(palignreport);
26 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "FNGLT", "none","qfile",false,false); parameters.push_back(pqfile);
27 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(paccnos);
28 CommandParameter pdups("dups", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pdups);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "RemoveSeqsCommand", "setParameters");
41 //**********************************************************************************************************************
42 string RemoveSeqsCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, count, list, taxonomy, quality, fastq or alignreport file.\n";
46 helpString += "It outputs a file containing the sequences NOT in the .accnos file.\n";
47 helpString += "The remove.seqs command parameters are accnos, fasta, name, group, count, list, taxonomy, qfile, alignreport, fastq and dups. You must provide accnos and at least one of the file parameters.\n";
48 helpString += "The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=true. \n";
49 helpString += "The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n";
50 helpString += "Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n";
51 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
55 m->errorOut(e, "RemoveSeqsCommand", "getHelpString");
59 //**********************************************************************************************************************
60 string RemoveSeqsCommand::getOutputPattern(string type) {
64 if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
65 else if (type == "fastq") { pattern = "[filename],pick,[extension]"; }
66 else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
67 else if (type == "name") { pattern = "[filename],pick,[extension]"; }
68 else if (type == "group") { pattern = "[filename],pick,[extension]"; }
69 else if (type == "count") { pattern = "[filename],pick,[extension]"; }
70 else if (type == "list") { pattern = "[filename],[distance],pick,[extension]"; }
71 else if (type == "qfile") { pattern = "[filename],pick,[extension]"; }
72 else if (type == "alignreport") { pattern = "[filename],pick.align.report"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "GetSeqsCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 RemoveSeqsCommand::RemoveSeqsCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["fasta"] = tempOutNames;
89 outputTypes["fastq"] = tempOutNames;
90 outputTypes["taxonomy"] = tempOutNames;
91 outputTypes["name"] = tempOutNames;
92 outputTypes["group"] = tempOutNames;
93 outputTypes["alignreport"] = tempOutNames;
94 outputTypes["list"] = tempOutNames;
95 outputTypes["qfile"] = tempOutNames;
96 outputTypes["count"] = tempOutNames;
99 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
103 //**********************************************************************************************************************
104 RemoveSeqsCommand::RemoveSeqsCommand(string option) {
106 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters = parser.getParameters();
118 ValidParameters validParameter;
119 map<string,string>::iterator it;
121 //check to make sure all parameters are valid for command
122 for (it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["fasta"] = tempOutNames;
129 outputTypes["fastq"] = tempOutNames;
130 outputTypes["taxonomy"] = tempOutNames;
131 outputTypes["name"] = tempOutNames;
132 outputTypes["group"] = tempOutNames;
133 outputTypes["alignreport"] = tempOutNames;
134 outputTypes["list"] = tempOutNames;
135 outputTypes["qfile"] = tempOutNames;
136 outputTypes["count"] = tempOutNames;
138 //if the user changes the output directory command factory will send this info to us in the output parameter
139 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("alignreport");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
154 it = parameters.find("fasta");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["fasta"] = inputDir + it->second; }
162 it = parameters.find("accnos");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["accnos"] = inputDir + it->second; }
170 it = parameters.find("list");
171 //user has given a template file
172 if(it != parameters.end()){
173 path = m->hasPath(it->second);
174 //if the user has not given a path then, add inputdir. else leave path alone.
175 if (path == "") { parameters["list"] = inputDir + it->second; }
178 it = parameters.find("name");
179 //user has given a template file
180 if(it != parameters.end()){
181 path = m->hasPath(it->second);
182 //if the user has not given a path then, add inputdir. else leave path alone.
183 if (path == "") { parameters["name"] = inputDir + it->second; }
186 it = parameters.find("group");
187 //user has given a template file
188 if(it != parameters.end()){
189 path = m->hasPath(it->second);
190 //if the user has not given a path then, add inputdir. else leave path alone.
191 if (path == "") { parameters["group"] = inputDir + it->second; }
194 it = parameters.find("taxonomy");
195 //user has given a template file
196 if(it != parameters.end()){
197 path = m->hasPath(it->second);
198 //if the user has not given a path then, add inputdir. else leave path alone.
199 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
202 it = parameters.find("qfile");
203 //user has given a template file
204 if(it != parameters.end()){
205 path = m->hasPath(it->second);
206 //if the user has not given a path then, add inputdir. else leave path alone.
207 if (path == "") { parameters["qfile"] = inputDir + it->second; }
210 it = parameters.find("count");
211 //user has given a template file
212 if(it != parameters.end()){
213 path = m->hasPath(it->second);
214 //if the user has not given a path then, add inputdir. else leave path alone.
215 if (path == "") { parameters["count"] = inputDir + it->second; }
218 it = parameters.find("fastq");
219 //user has given a template file
220 if(it != parameters.end()){
221 path = m->hasPath(it->second);
222 //if the user has not given a path then, add inputdir. else leave path alone.
223 if (path == "") { parameters["fastq"] = inputDir + it->second; }
228 //check for required parameters
229 accnosfile = validParameter.validFile(parameters, "accnos", true);
230 if (accnosfile == "not open") { abort = true; }
231 else if (accnosfile == "not found") {
232 accnosfile = m->getAccnosFile();
233 if (accnosfile != "") { m->mothurOut("Using " + accnosfile + " as input file for the accnos parameter."); m->mothurOutEndLine(); }
235 m->mothurOut("You have no valid accnos file and accnos is required."); m->mothurOutEndLine();
238 }else { m->setAccnosFile(accnosfile); }
240 fastafile = validParameter.validFile(parameters, "fasta", true);
241 if (fastafile == "not open") { fastafile = ""; abort = true; }
242 else if (fastafile == "not found") { fastafile = ""; }
243 else { m->setFastaFile(fastafile); }
245 namefile = validParameter.validFile(parameters, "name", true);
246 if (namefile == "not open") { namefile = ""; abort = true; }
247 else if (namefile == "not found") { namefile = ""; }
248 else { m->setNameFile(namefile); }
250 groupfile = validParameter.validFile(parameters, "group", true);
251 if (groupfile == "not open") { abort = true; }
252 else if (groupfile == "not found") { groupfile = ""; }
253 else { m->setGroupFile(groupfile); }
255 alignfile = validParameter.validFile(parameters, "alignreport", true);
256 if (alignfile == "not open") { abort = true; }
257 else if (alignfile == "not found") { alignfile = ""; }
259 listfile = validParameter.validFile(parameters, "list", true);
260 if (listfile == "not open") { abort = true; }
261 else if (listfile == "not found") { listfile = ""; }
262 else { m->setListFile(listfile); }
264 taxfile = validParameter.validFile(parameters, "taxonomy", true);
265 if (taxfile == "not open") { abort = true; }
266 else if (taxfile == "not found") { taxfile = ""; }
267 else { m->setTaxonomyFile(taxfile); }
269 qualfile = validParameter.validFile(parameters, "qfile", true);
270 if (qualfile == "not open") { abort = true; }
271 else if (qualfile == "not found") { qualfile = ""; }
272 else { m->setQualFile(qualfile); }
274 fastqfile = validParameter.validFile(parameters, "fastq", true);
275 if (fastqfile == "not open") { abort = true; }
276 else if (fastqfile == "not found") { fastqfile = ""; }
278 string usedDups = "true";
279 string temp = validParameter.validFile(parameters, "dups", false);
280 if (temp == "not found") {
281 if (namefile != "") { temp = "true"; }
282 else { temp = "false"; usedDups = ""; }
284 dups = m->isTrue(temp);
286 countfile = validParameter.validFile(parameters, "count", true);
287 if (countfile == "not open") { countfile = ""; abort = true; }
288 else if (countfile == "not found") { countfile = ""; }
289 else { m->setCountTableFile(countfile); }
291 if ((namefile != "") && (countfile != "")) {
292 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
295 if ((groupfile != "") && (countfile != "")) {
296 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
299 if ((fastqfile == "") && (countfile == "") && (fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport, fastq or list."); m->mothurOutEndLine(); abort = true; }
301 if (countfile == "") {
302 if ((fastafile != "") && (namefile == "")) {
303 vector<string> files; files.push_back(fastafile);
304 parser.getNameFile(files);
310 catch(exception& e) {
311 m->errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
315 //**********************************************************************************************************************
317 int RemoveSeqsCommand::execute(){
320 if (abort == true) { if (calledHelp) { return 0; } return 2; }
322 //get names you want to keep
323 names = m->readAccnos(accnosfile);
325 if (m->control_pressed) { return 0; }
327 if (countfile != "") {
328 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
329 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
333 //read through the correct file and output lines you want to keep
334 if (namefile != "") { readName(); }
335 if (fastafile != "") { readFasta(); }
336 if (fastqfile != "") { readFastq(); }
337 if (groupfile != "") { readGroup(); }
338 if (alignfile != "") { readAlign(); }
339 if (listfile != "") { readList(); }
340 if (taxfile != "") { readTax(); }
341 if (qualfile != "") { readQual(); }
342 if (countfile != "") { readCount(); }
344 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
346 if (outputNames.size() != 0) {
347 m->mothurOutEndLine();
348 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
349 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
350 m->mothurOutEndLine();
352 //set fasta file as new current fastafile
354 itTypes = outputTypes.find("fasta");
355 if (itTypes != outputTypes.end()) {
356 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
359 itTypes = outputTypes.find("name");
360 if (itTypes != outputTypes.end()) {
361 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
364 itTypes = outputTypes.find("group");
365 if (itTypes != outputTypes.end()) {
366 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
369 itTypes = outputTypes.find("list");
370 if (itTypes != outputTypes.end()) {
371 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
374 itTypes = outputTypes.find("taxonomy");
375 if (itTypes != outputTypes.end()) {
376 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
379 itTypes = outputTypes.find("qfile");
380 if (itTypes != outputTypes.end()) {
381 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
384 itTypes = outputTypes.find("count");
385 if (itTypes != outputTypes.end()) {
386 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
393 catch(exception& e) {
394 m->errorOut(e, "RemoveSeqsCommand", "execute");
399 //**********************************************************************************************************************
400 int RemoveSeqsCommand::readFasta(){
402 string thisOutputDir = outputDir;
403 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
404 map<string, string> variables;
405 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
406 variables["[extension]"] = m->getExtension(fastafile);
407 string outputFileName = getOutputFileName("fasta", variables);
410 m->openOutputFile(outputFileName, out);
413 m->openInputFile(fastafile, in);
416 bool wroteSomething = false;
417 int removedCount = 0;
420 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
422 Sequence currSeq(in);
424 if (!dups) {//adjust name if needed
425 map<string, string>::iterator it = uniqueMap.find(currSeq.getName());
426 if (it != uniqueMap.end()) { currSeq.setName(it->second); }
429 name = currSeq.getName();
432 //if this name is in the accnos file
433 if (names.count(name) == 0) {
434 wroteSomething = true;
436 currSeq.printSequence(out);
437 }else { removedCount++; }
444 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
445 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
447 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
452 catch(exception& e) {
453 m->errorOut(e, "RemoveSeqsCommand", "readFasta");
457 //**********************************************************************************************************************
458 int RemoveSeqsCommand::readFastq(){
460 bool wroteSomething = false;
461 int removedCount = 0;
464 m->openInputFile(fastqfile, in);
466 string thisOutputDir = outputDir;
467 if (outputDir == "") { thisOutputDir += m->hasPath(fastqfile); }
468 map<string, string> variables;
469 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastqfile));
470 variables["[extension]"] = m->getExtension(fastqfile);
471 string outputFileName = getOutputFileName("fastq", variables);
473 m->openOutputFile(outputFileName, out);
478 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
481 string input = m->getline(in); m->gobble(in);
483 string outputString = input + "\n";
485 if (input[0] == '@') {
487 outputString += m->getline(in) + "\n"; m->gobble(in);
488 outputString += m->getline(in) + "\n"; m->gobble(in);
489 outputString += m->getline(in) + "\n"; m->gobble(in);
491 vector<string> splits = m->splitWhiteSpace(input);
492 string name = splits[0];
493 name = name.substr(1);
496 if (names.count(name) == 0) {
497 wroteSomething = true;
499 }else { removedCount++; }
508 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
509 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
511 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fastq file."); m->mothurOutEndLine();
517 catch(exception& e) {
518 m->errorOut(e, "RemoveSeqsCommand", "readFastq");
522 //**********************************************************************************************************************
523 int RemoveSeqsCommand::readQual(){
525 string thisOutputDir = outputDir;
526 if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); }
527 map<string, string> variables;
528 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile));
529 variables["[extension]"] = m->getExtension(qualfile);
530 string outputFileName = getOutputFileName("qfile", variables);
532 m->openOutputFile(outputFileName, out);
536 m->openInputFile(qualfile, in);
539 bool wroteSomething = false;
540 int removedCount = 0;
544 string saveName = "";
550 if (name.length() != 0) {
551 saveName = name.substr(1);
554 if (c == 10 || c == 13 || c == -1){ break; }
561 char letter= in.get();
562 if(letter == '>'){ in.putback(letter); break; }
563 else{ scores += letter; }
568 if (!dups) {//adjust name if needed
569 map<string, string>::iterator it = uniqueMap.find(saveName);
570 if (it != uniqueMap.end()) { name = ">" + it->second; saveName = it->second; }
573 if (names.count(saveName) == 0) {
574 wroteSomething = true;
576 out << name << endl << scores;
577 }else { removedCount++; }
585 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
586 outputNames.push_back(outputFileName); outputTypes["qfile"].push_back(outputFileName);
588 m->mothurOut("Removed " + toString(removedCount) + " sequences from your quality file."); m->mothurOutEndLine();
593 catch(exception& e) {
594 m->errorOut(e, "RemoveSeqsCommand", "readQual");
598 //**********************************************************************************************************************
599 int RemoveSeqsCommand::readCount(){
602 string thisOutputDir = outputDir;
603 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
604 map<string, string> variables;
605 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
606 variables["[extension]"] = m->getExtension(countfile);
607 string outputFileName = getOutputFileName("count", variables);
610 m->openOutputFile(outputFileName, out);
613 m->openInputFile(countfile, in);
615 bool wroteSomething = false;
616 int removedCount = 0;
618 string headers = m->getline(in); m->gobble(in);
619 out << headers << endl;
621 string name, rest; int thisTotal;
624 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
626 in >> name; m->gobble(in);
627 in >> thisTotal; m->gobble(in);
628 rest = m->getline(in); m->gobble(in);
629 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
631 if (names.count(name) == 0) {
632 out << name << '\t' << thisTotal << '\t' << rest << endl;
633 wroteSomething = true;
634 }else { removedCount += thisTotal; }
639 //check for groups that have been eliminated
641 if (ct.testGroups(outputFileName)) {
642 ct.readTable(outputFileName, true, false);
643 ct.printTable(outputFileName);
647 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
648 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
650 m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
654 catch(exception& e) {
655 m->errorOut(e, "RemoveSeqsCommand", "readCount");
659 //**********************************************************************************************************************
660 int RemoveSeqsCommand::readList(){
662 string thisOutputDir = outputDir;
663 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
664 map<string, string> variables;
665 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
666 variables["[extension]"] = m->getExtension(listfile);
669 m->openInputFile(listfile, in);
671 bool wroteSomething = false;
672 int removedCount = 0;
678 //read in list vector
681 //make a new list vector
683 newList.setLabel(list.getLabel());
685 variables["[distance]"] = list.getLabel();
686 string outputFileName = getOutputFileName("list", variables);
689 m->openOutputFile(outputFileName, out);
690 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
692 vector<string> binLabels = list.getLabels();
693 vector<string> newBinLabels;
695 if (m->control_pressed) { in.close(); out.close(); return 0; }
698 for (int i = 0; i < list.getNumBins(); i++) {
699 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
701 //parse out names that are in accnos file
702 string bin = list.get(i);
703 vector<string> bnames;
704 m->splitAtComma(bin, bnames);
706 string newNames = "";
707 for (int j = 0; j < bnames.size(); j++) {
708 string name = bnames[j];
709 //if that name is in the .accnos file, add it
710 if (names.count(name) == 0) { newNames += name + ","; }
711 else { removedCount++; }
714 //if there are names in this bin add to new list
715 if (newNames != "") {
716 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
717 newList.push_back(newNames);
718 newBinLabels.push_back(binLabels[i]);
722 //print new listvector
723 if (newList.getNumBins() != 0) {
724 wroteSomething = true;
725 newList.setLabels(newBinLabels);
726 newList.printHeaders(out);
737 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
739 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
744 catch(exception& e) {
745 m->errorOut(e, "RemoveSeqsCommand", "readList");
749 //**********************************************************************************************************************
750 int RemoveSeqsCommand::readName(){
752 string thisOutputDir = outputDir;
753 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
754 map<string, string> variables;
755 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
756 variables["[extension]"] = m->getExtension(namefile);
757 string outputFileName = getOutputFileName("name", variables);
759 m->openOutputFile(outputFileName, out);
762 m->openInputFile(namefile, in);
763 string name, firstCol, secondCol;
765 bool wroteSomething = false;
766 int removedCount = 0;
769 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
771 in >> firstCol; m->gobble(in);
774 vector<string> parsedNames;
775 m->splitAtComma(secondCol, parsedNames);
777 vector<string> validSecond; validSecond.clear();
778 for (int i = 0; i < parsedNames.size(); i++) {
779 if (names.count(parsedNames[i]) == 0) {
780 validSecond.push_back(parsedNames[i]);
784 if ((dups) && (validSecond.size() != parsedNames.size())) { //if dups is true and we want to get rid of anyone, get rid of everyone
785 for (int i = 0; i < parsedNames.size(); i++) { names.insert(parsedNames[i]); }
786 removedCount += parsedNames.size();
788 removedCount += parsedNames.size()-validSecond.size();
789 //if the name in the first column is in the set then print it and any other names in second column also in set
790 if (names.count(firstCol) == 0) {
792 wroteSomething = true;
794 out << firstCol << '\t';
796 //you know you have at least one valid second since first column is valid
797 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
798 out << validSecond[validSecond.size()-1] << endl;
800 //make first name in set you come to first column and then add the remaining names to second column
803 //you want part of this row
804 if (validSecond.size() != 0) {
806 wroteSomething = true;
808 out << validSecond[0] << '\t';
809 //we are changing the unique name in the fasta file
810 uniqueMap[firstCol] = validSecond[0];
812 //you know you have at least one valid second since first column is valid
813 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
814 out << validSecond[validSecond.size()-1] << endl;
823 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
824 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
826 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
830 catch(exception& e) {
831 m->errorOut(e, "RemoveSeqsCommand", "readName");
836 //**********************************************************************************************************************
837 int RemoveSeqsCommand::readGroup(){
839 string thisOutputDir = outputDir;
840 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
841 map<string, string> variables;
842 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
843 variables["[extension]"] = m->getExtension(groupfile);
844 string outputFileName = getOutputFileName("group", variables);
846 m->openOutputFile(outputFileName, out);
849 m->openInputFile(groupfile, in);
852 bool wroteSomething = false;
853 int removedCount = 0;
856 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
858 in >> name; //read from first column
859 in >> group; //read from second column
861 //if this name is in the accnos file
862 if (names.count(name) == 0) {
863 wroteSomething = true;
864 out << name << '\t' << group << endl;
865 }else { removedCount++; }
872 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
873 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
875 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
880 catch(exception& e) {
881 m->errorOut(e, "RemoveSeqsCommand", "readGroup");
885 //**********************************************************************************************************************
886 int RemoveSeqsCommand::readTax(){
888 string thisOutputDir = outputDir;
889 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
890 map<string, string> variables;
891 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
892 variables["[extension]"] = m->getExtension(taxfile);
893 string outputFileName = getOutputFileName("taxonomy", variables);
895 m->openOutputFile(outputFileName, out);
898 m->openInputFile(taxfile, in);
901 bool wroteSomething = false;
902 int removedCount = 0;
905 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
907 in >> name; m->gobble(in); //read from first column
908 in >> tax; //read from second column
910 if (!dups) {//adjust name if needed
911 map<string, string>::iterator it = uniqueMap.find(name);
912 if (it != uniqueMap.end()) { name = it->second; }
915 //if this name is in the accnos file
916 if (names.count(name) == 0) {
917 wroteSomething = true;
919 out << name << '\t' << tax << endl;
920 }else { removedCount++; }
927 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
928 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
930 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
934 catch(exception& e) {
935 m->errorOut(e, "RemoveSeqsCommand", "readTax");
939 //**********************************************************************************************************************
940 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
941 int RemoveSeqsCommand::readAlign(){
943 string thisOutputDir = outputDir;
944 if (outputDir == "") { thisOutputDir += m->hasPath(alignfile); }
945 map<string, string> variables;
946 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(alignfile));
947 string outputFileName = getOutputFileName("alignreport", variables);
950 m->openOutputFile(outputFileName, out);
953 m->openInputFile(alignfile, in);
956 bool wroteSomething = false;
957 int removedCount = 0;
959 //read column headers
960 for (int i = 0; i < 16; i++) {
961 if (!in.eof()) { in >> junk; out << junk << '\t'; }
967 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
969 in >> name; //read from first column
971 if (!dups) {//adjust name if needed
972 map<string, string>::iterator it = uniqueMap.find(name);
973 if (it != uniqueMap.end()) { name = it->second; }
976 //if this name is in the accnos file
977 if (names.count(name) == 0) {
978 wroteSomething = true;
983 for (int i = 0; i < 15; i++) {
984 if (!in.eof()) { in >> junk; out << junk << '\t'; }
989 }else {//still read just don't do anything with it
993 for (int i = 0; i < 15; i++) {
994 if (!in.eof()) { in >> junk; }
1004 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine(); }
1005 outputTypes["alignreport"].push_back(outputFileName); outputNames.push_back(outputFileName);
1007 m->mothurOut("Removed " + toString(removedCount) + " sequences from your alignreport file."); m->mothurOutEndLine();
1013 catch(exception& e) {
1014 m->errorOut(e, "RemoveSeqsCommand", "readAlign");
1018 //**********************************************************************************************************************