5 * Created by Sarah Westcott on 7/8/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "getseqscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
14 //**********************************************************************************************************************
16 GetSeqsCommand::GetSeqsCommand(string option){
20 //allow user to run help
21 if(option == "help") { help(); abort = true; }
24 //valid paramters for this command
25 string Array[] = {"fasta","name", "group", "alignreport", "accnos", "list"};
26 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
28 OptionParser parser(option);
29 map<string,string> parameters = parser.getParameters();
31 ValidParameters validParameter;
33 //check to make sure all parameters are valid for command
34 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //check for required parameters
39 accnosfile = validParameter.validFile(parameters, "accnos", true);
40 if (accnosfile == "not open") { abort = true; }
41 else if (accnosfile == "not found") { accnosfile = ""; mothurOut("You must provide an accnos file."); mothurOutEndLine(); abort = true; }
43 fastafile = validParameter.validFile(parameters, "fasta", true);
44 if (fastafile == "not open") { abort = true; }
45 else if (fastafile == "not found") { fastafile = ""; }
47 namefile = validParameter.validFile(parameters, "name", true);
48 if (namefile == "not open") { abort = true; }
49 else if (namefile == "not found") { namefile = ""; }
51 groupfile = validParameter.validFile(parameters, "group", true);
52 if (groupfile == "not open") { abort = true; }
53 else if (groupfile == "not found") { groupfile = ""; }
55 alignfile = validParameter.validFile(parameters, "alignreport", true);
56 if (alignfile == "not open") { abort = true; }
57 else if (alignfile == "not found") { alignfile = ""; }
59 listfile = validParameter.validFile(parameters, "list", true);
60 if (listfile == "not open") { abort = true; }
61 else if (listfile == "not found") { listfile = ""; }
63 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "")) { mothurOut("You must provide one of the following: fasta, name, group, alignreport or listfile."); mothurOutEndLine(); abort = true; }
65 if (parameters.size() > 2) { mothurOut("You may only enter one of the following: fasta, name, group, alignreport or listfile."); mothurOutEndLine(); abort = true; }
70 errorOut(e, "GetSeqsCommand", "GetSeqsCommand");
74 //**********************************************************************************************************************
76 void GetSeqsCommand::help(){
78 mothurOut("The get.seqs command reads an .accnos file and one of the following file types: fasta, name, group, list or alignreport file.\n");
79 mothurOut("It outputs a file containing only the sequences in the .accnos file.\n");
80 mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list and alignreport. You must provide accnos and one of the other parameters.\n");
81 mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
82 mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
83 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
86 errorOut(e, "GetSeqsCommand", "help");
91 //**********************************************************************************************************************
93 int GetSeqsCommand::execute(){
96 if (abort == true) { return 0; }
98 //get names you want to keep
101 //read through the correct file and output lines you want to keep
102 if (fastafile != "") { readFasta(); }
103 else if (namefile != "") { readName(); }
104 else if (groupfile != "") { readGroup(); }
105 else if (alignfile != "") { readAlign(); }
106 else if (listfile != "") { readList(); }
111 catch(exception& e) {
112 errorOut(e, "GetSeqsCommand", "execute");
117 //**********************************************************************************************************************
118 void GetSeqsCommand::readFasta(){
120 string outputFileName = getRootName(fastafile) + "pick" + getExtension(fastafile);
122 openOutputFile(outputFileName, out);
125 openInputFile(fastafile, in);
128 bool wroteSomething = false;
131 Sequence currSeq(in);
132 name = currSeq.getName();
135 //if this name is in the accnos file
136 if (names.count(name) == 1) {
137 wroteSomething = true;
139 currSeq.printSequence(out);
149 if (wroteSomething == false) {
150 mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
151 remove(outputFileName.c_str());
155 catch(exception& e) {
156 errorOut(e, "GetSeqsCommand", "readFasta");
160 //**********************************************************************************************************************
161 void GetSeqsCommand::readList(){
163 string outputFileName = getRootName(listfile) + "pick" + getExtension(listfile);
165 openOutputFile(outputFileName, out);
168 openInputFile(listfile, in);
170 bool wroteSomething = false;
173 //read in list vector
176 //make a new list vector
178 newList.setLabel(list.getLabel());
181 for (int i = 0; i < list.getNumBins(); i++) {
183 //parse out names that are in accnos file
184 string binnames = list.get(i);
186 string newNames = "";
187 while (binnames.find_first_of(',') != -1) {
188 string name = binnames.substr(0,binnames.find_first_of(','));
189 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
191 //if that name is in the .accnos file, add it
192 if (names.count(name) == 1) { newNames += name + ","; }
196 if (names.count(binnames) == 1) { newNames += binnames; }
198 //if there are names in this bin add to new list
199 if (newNames != "") { newList.push_back(newNames); }
202 //print new listvector
203 if (newList.getNumBins() != 0) {
204 wroteSomething = true;
213 if (wroteSomething == false) {
214 mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
215 remove(outputFileName.c_str());
219 catch(exception& e) {
220 errorOut(e, "GetSeqsCommand", "readList");
224 //**********************************************************************************************************************
225 void GetSeqsCommand::readName(){
228 string outputFileName = getRootName(namefile) + "pick" + getExtension(namefile);
230 openOutputFile(outputFileName, out);
233 openInputFile(namefile, in);
234 string name, firstCol, secondCol;
236 bool wroteSomething = false;
244 vector<string> parsedNames;
245 //parse second column saving each name
246 while (secondCol.find_first_of(',') != -1) {
247 name = secondCol.substr(0,secondCol.find_first_of(','));
248 secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
249 parsedNames.push_back(name);
252 //get name after last ,
253 parsedNames.push_back(secondCol);
255 vector<string> validSecond;
256 for (int i = 0; i < parsedNames.size(); i++) {
257 if (names.count(parsedNames[i]) == 1) {
258 validSecond.push_back(parsedNames[i]);
263 //if the name in the first column is in the set then print it and any other names in second column also in set
264 if (names.count(firstCol) == 1) {
266 wroteSomething = true;
268 out << firstCol << '\t';
270 //you know you have at least one valid second since first column is valid
271 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
272 out << validSecond[validSecond.size()-1] << endl;
275 //make first name in set you come to first column and then add the remaining names to second column
277 //you want part of this row
278 if (validSecond.size() != 0) {
280 wroteSomething = true;
282 out << validSecond[0] << '\t';
284 //you know you have at least one valid second since first column is valid
285 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
286 out << validSecond[validSecond.size()-1] << endl;
295 if (wroteSomething == false) {
296 mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
297 remove(outputFileName.c_str());
301 catch(exception& e) {
302 errorOut(e, "GetSeqsCommand", "readName");
307 //**********************************************************************************************************************
308 void GetSeqsCommand::readGroup(){
311 string outputFileName = getRootName(groupfile) + "pick" + getExtension(groupfile);
313 openOutputFile(outputFileName, out);
316 openInputFile(groupfile, in);
319 bool wroteSomething = false;
323 in >> name; //read from first column
324 in >> group; //read from second column
326 //if this name is in the accnos file
327 if (names.count(name) == 1) {
328 wroteSomething = true;
330 out << name << '\t' << group << endl;
340 if (wroteSomething == false) {
341 mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
342 remove(outputFileName.c_str());
346 catch(exception& e) {
347 errorOut(e, "GetSeqsCommand", "readGroup");
352 //**********************************************************************************************************************
353 //alignreport file has a column header line then all other lines contain 16 columns. we just want the first column since that contains the name
354 void GetSeqsCommand::readAlign(){
356 string outputFileName = getRootName(getRootName(alignfile)) + "pick.align.report";
358 openOutputFile(outputFileName, out);
361 openInputFile(alignfile, in);
364 bool wroteSomething = false;
366 //read column headers
367 for (int i = 0; i < 16; i++) {
368 if (!in.eof()) { in >> junk; out << junk << '\t'; }
375 in >> name; //read from first column
377 //if this name is in the accnos file
378 if (names.count(name) == 1) {
379 wroteSomething = true;
384 for (int i = 0; i < 15; i++) {
385 if (!in.eof()) { in >> junk; out << junk << '\t'; }
392 }else {//still read just don't do anything with it
394 for (int i = 0; i < 15; i++) {
395 if (!in.eof()) { in >> junk; }
405 if (wroteSomething == false) {
406 mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
407 remove(outputFileName.c_str());
411 catch(exception& e) {
412 errorOut(e, "GetSeqsCommand", "readAlign");
416 //**********************************************************************************************************************
418 void GetSeqsCommand::readAccnos(){
422 openInputFile(accnosfile, in);
435 catch(exception& e) {
436 errorOut(e, "GetSeqsCommand", "readAccnos");
441 //**********************************************************************************************************************