2 * removeseqscommand.cpp
5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "removeseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
14 //**********************************************************************************************************************
16 RemoveSeqsCommand::RemoveSeqsCommand(string option){
20 //allow user to run help
21 if(option == "help") { help(); abort = true; }
24 //valid paramters for this command
25 string Array[] = {"fasta","name", "group", "alignreport", "accnos", "list" };
26 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
28 OptionParser parser(option);
29 map<string,string> parameters = parser.getParameters();
31 ValidParameters validParameter;
33 //check to make sure all parameters are valid for command
34 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //check for required parameters
39 accnosfile = validParameter.validFile(parameters, "accnos", true);
40 if (accnosfile == "not open") { abort = true; }
41 else if (accnosfile == "not found") { accnosfile = ""; mothurOut("You must provide an accnos file."); mothurOutEndLine(); abort = true; }
43 fastafile = validParameter.validFile(parameters, "fasta", true);
44 if (fastafile == "not open") { abort = true; }
45 else if (fastafile == "not found") { fastafile = ""; }
47 namefile = validParameter.validFile(parameters, "name", true);
48 if (namefile == "not open") { abort = true; }
49 else if (namefile == "not found") { namefile = ""; }
51 groupfile = validParameter.validFile(parameters, "group", true);
52 if (groupfile == "not open") { abort = true; }
53 else if (groupfile == "not found") { groupfile = ""; }
55 alignfile = validParameter.validFile(parameters, "alignreport", true);
56 if (alignfile == "not open") { abort = true; }
57 else if (alignfile == "not found") { alignfile = ""; }
59 listfile = validParameter.validFile(parameters, "list", true);
60 if (listfile == "not open") { abort = true; }
61 else if (listfile == "not found") { listfile = ""; }
63 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "")) { mothurOut("You must provide one of the following: fasta, name, group, alignreport or list."); mothurOutEndLine(); abort = true; }
65 if (parameters.size() > 2) { mothurOut("You may only enter one of the following: fasta, name, group, alignreport, or list."); mothurOutEndLine(); abort = true; }
70 errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
74 //**********************************************************************************************************************
76 void RemoveSeqsCommand::help(){
78 mothurOut("The remove.seqs command reads an .accnos file and one of the following file types: fasta, name, group, list or alignreport file.\n");
79 mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
80 mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list and alignreport. You must provide accnos and one of the other parameters.\n");
81 mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
82 mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
83 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
86 errorOut(e, "RemoveSeqsCommand", "help");
91 //**********************************************************************************************************************
93 int RemoveSeqsCommand::execute(){
96 if (abort == true) { return 0; }
98 //get names you want to keep
101 //read through the correct file and output lines you want to keep
102 if (fastafile != "") { readFasta(); }
103 else if (namefile != "") { readName(); }
104 else if (groupfile != "") { readGroup(); }
105 else if (alignfile != "") { readAlign(); }
106 else if (listfile != "") { readList(); }
111 catch(exception& e) {
112 errorOut(e, "RemoveSeqsCommand", "execute");
117 //**********************************************************************************************************************
118 void RemoveSeqsCommand::readFasta(){
120 string outputFileName = getRootName(fastafile) + "pick" + getExtension(fastafile);
122 openOutputFile(outputFileName, out);
125 openInputFile(fastafile, in);
128 bool wroteSomething = false;
131 Sequence currSeq(in);
132 name = currSeq.getName();
135 //if this name is in the accnos file
136 if (names.count(name) == 0) {
137 wroteSomething = true;
139 currSeq.printSequence(out);
140 }else { names.erase(name); }
147 if (wroteSomething == false) {
148 mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
149 remove(outputFileName.c_str());
153 catch(exception& e) {
154 errorOut(e, "RemoveSeqsCommand", "readFasta");
158 //**********************************************************************************************************************
159 void RemoveSeqsCommand::readList(){
161 string outputFileName = getRootName(listfile) + "pick" + getExtension(listfile);
163 openOutputFile(outputFileName, out);
166 openInputFile(listfile, in);
168 bool wroteSomething = false;
171 //read in list vector
174 //make a new list vector
176 newList.setLabel(list.getLabel());
179 for (int i = 0; i < list.getNumBins(); i++) {
181 //parse out names that are in accnos file
182 string binnames = list.get(i);
184 string newNames = "";
185 while (binnames.find_first_of(',') != -1) {
186 string name = binnames.substr(0,binnames.find_first_of(','));
187 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
189 //if that name is in the .accnos file, add it
190 if (names.count(name) == 0) { newNames += name + ","; }
194 if (names.count(binnames) == 0) { newNames += binnames; }
196 //if there are names in this bin add to new list
197 if (newNames != "") { newList.push_back(newNames); }
200 //print new listvector
201 if (newList.getNumBins() != 0) {
202 wroteSomething = true;
211 if (wroteSomething == false) {
212 mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
213 remove(outputFileName.c_str());
217 catch(exception& e) {
218 errorOut(e, "RemoveSeqsCommand", "readList");
222 //**********************************************************************************************************************
223 void RemoveSeqsCommand::readName(){
226 string outputFileName = getRootName(namefile) + "pick" + getExtension(namefile);
229 openOutputFile(outputFileName, out);
232 openInputFile(namefile, in);
233 string name, firstCol, secondCol;
235 bool wroteSomething = false;
242 vector<string> parsedNames;
243 //parse second column saving each name
244 while (secondCol.find_first_of(',') != -1) {
245 name = secondCol.substr(0,secondCol.find_first_of(','));
246 secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
247 parsedNames.push_back(name);
251 //get name after last ,
252 parsedNames.push_back(secondCol);
254 vector<string> validSecond; validSecond.clear();
255 for (int i = 0; i < parsedNames.size(); i++) {
256 if (names.count(parsedNames[i]) == 0) {
257 validSecond.push_back(parsedNames[i]);
261 //if the name in the first column is in the set then print it and any other names in second column also in set
262 if (names.count(firstCol) == 0) {
264 wroteSomething = true;
266 out << firstCol << '\t';
268 //you know you have at least one valid second since first column is valid
269 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
270 out << validSecond[validSecond.size()-1] << endl;
272 //make first name in set you come to first column and then add the remaining names to second column
275 //you want part of this row
276 if (validSecond.size() != 0) {
278 wroteSomething = true;
280 out << validSecond[0] << '\t';
282 //you know you have at least one valid second since first column is valid
283 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
284 out << validSecond[validSecond.size()-1] << endl;
293 if (wroteSomething == false) {
294 mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
295 remove(outputFileName.c_str());
299 catch(exception& e) {
300 errorOut(e, "RemoveSeqsCommand", "readName");
305 //**********************************************************************************************************************
306 void RemoveSeqsCommand::readGroup(){
309 string outputFileName = getRootName(groupfile) + "pick" + getExtension(groupfile);
311 openOutputFile(outputFileName, out);
314 openInputFile(groupfile, in);
317 bool wroteSomething = false;
321 in >> name; //read from first column
322 in >> group; //read from second column
324 //if this name is in the accnos file
325 if (names.count(name) == 0) {
326 wroteSomething = true;
327 out << name << '\t' << group << endl;
328 }else { names.erase(name); }
335 if (wroteSomething == false) {
336 mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
337 remove(outputFileName.c_str());
341 catch(exception& e) {
342 errorOut(e, "RemoveSeqsCommand", "readGroup");
347 //**********************************************************************************************************************
348 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
349 void RemoveSeqsCommand::readAlign(){
351 string outputFileName = getRootName(getRootName(alignfile)) + "pick.align.report";
353 openOutputFile(outputFileName, out);
356 openInputFile(alignfile, in);
359 bool wroteSomething = false;
361 //read column headers
362 for (int i = 0; i < 16; i++) {
363 if (!in.eof()) { in >> junk; out << junk << '\t'; }
370 in >> name; //read from first column
372 //if this name is in the accnos file
373 if (names.count(name) == 0) {
374 wroteSomething = true;
379 for (int i = 0; i < 15; i++) {
380 if (!in.eof()) { in >> junk; out << junk << '\t'; }
385 }else {//still read just don't do anything with it
389 for (int i = 0; i < 15; i++) {
390 if (!in.eof()) { in >> junk; }
400 if (wroteSomething == false) {
401 mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
402 remove(outputFileName.c_str());
406 catch(exception& e) {
407 errorOut(e, "RemoveSeqsCommand", "readAlign");
411 //**********************************************************************************************************************
412 void RemoveSeqsCommand::readAccnos(){
416 openInputFile(accnosfile, in);
429 catch(exception& e) {
430 errorOut(e, "RemoveSeqsCommand", "readAccnos");
435 //**********************************************************************************************************************