2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 ScreenSeqsCommand::ScreenSeqsCommand(string option) {
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength",
25 "name", "group", "alignreport","outputdir","inputdir"};
26 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
28 OptionParser parser(option);
29 map<string,string> parameters = parser.getParameters();
31 ValidParameters validParameter;
32 map<string,string>::iterator it;
34 //check to make sure all parameters are valid for command
35 for (it = parameters.begin(); it != parameters.end(); it++) {
36 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false);
41 if (inputDir == "not found"){ inputDir = ""; }
44 it = parameters.find("fasta");
45 //user has given a template file
46 if(it != parameters.end()){
47 path = hasPath(it->second);
48 //if the user has not given a path then, add inputdir. else leave path alone.
49 if (path == "") { parameters["fasta"] = inputDir + it->second; }
52 it = parameters.find("group");
53 //user has given a template file
54 if(it != parameters.end()){
55 path = hasPath(it->second);
56 //if the user has not given a path then, add inputdir. else leave path alone.
57 if (path == "") { parameters["group"] = inputDir + it->second; }
60 it = parameters.find("name");
61 //user has given a template file
62 if(it != parameters.end()){
63 path = hasPath(it->second);
64 //if the user has not given a path then, add inputdir. else leave path alone.
65 if (path == "") { parameters["name"] = inputDir + it->second; }
68 it = parameters.find("alignreport");
69 //user has given a template file
70 if(it != parameters.end()){
71 path = hasPath(it->second);
72 //if the user has not given a path then, add inputdir. else leave path alone.
73 if (path == "") { parameters["alignreport"] = inputDir + it->second; }
77 //check for required parameters
78 fastafile = validParameter.validFile(parameters, "fasta", true);
79 if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
80 else if (fastafile == "not open") { abort = true; }
82 groupfile = validParameter.validFile(parameters, "group", true);
83 if (groupfile == "not open") { abort = true; }
84 else if (groupfile == "not found") { groupfile = ""; }
86 namefile = validParameter.validFile(parameters, "name", true);
87 if (namefile == "not open") { abort = true; }
88 else if (namefile == "not found") { namefile = ""; }
90 alignreport = validParameter.validFile(parameters, "alignreport", true);
91 if (alignreport == "not open") { abort = true; }
92 else if (alignreport == "not found") { alignreport = ""; }
94 //if the user changes the output directory command factory will send this info to us in the output parameter
95 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
97 outputDir += hasPath(fastafile); //if user entered a file with a path then preserve it
100 //check for optional parameter and set defaults
101 // ...at some point should added some additional type checking...
103 temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
104 convert(temp, startPos);
106 temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; }
107 convert(temp, endPos);
109 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
110 convert(temp, maxAmbig);
112 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; }
113 convert(temp, maxHomoP);
115 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; }
116 convert(temp, minLength);
118 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; }
119 convert(temp, maxLength);
123 catch(exception& e) {
124 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
128 //**********************************************************************************************************************
130 void ScreenSeqsCommand::help(){
132 m->mothurOut("The screen.seqs command reads a fastafile and creates .....\n");
133 m->mothurOut("The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group.\n");
134 m->mothurOut("The fasta parameter is required.\n");
135 m->mothurOut("The start parameter .... The default is -1.\n");
136 m->mothurOut("The end parameter .... The default is -1.\n");
137 m->mothurOut("The maxambig parameter .... The default is -1.\n");
138 m->mothurOut("The maxhomop parameter .... The default is -1.\n");
139 m->mothurOut("The minlength parameter .... The default is -1.\n");
140 m->mothurOut("The maxlength parameter .... The default is -1.\n");
141 m->mothurOut("The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n");
142 m->mothurOut("The screen.seqs command should be in the following format: \n");
143 m->mothurOut("screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, \n");
144 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
145 m->mothurOut("Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
146 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
149 catch(exception& e) {
150 m->errorOut(e, "ScreenSeqsCommand", "help");
155 //***************************************************************************************************************
157 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
159 //***************************************************************************************************************
161 int ScreenSeqsCommand::execute(){
164 if (abort == true) { return 0; }
167 openInputFile(fastafile, inFASTA);
169 set<string> badSeqNames;
171 string goodSeqFile = outputDir + getRootName(getSimpleName(fastafile)) + "good" + getExtension(fastafile);
172 string badSeqFile = outputDir + getRootName(getSimpleName(fastafile)) + "bad" + getExtension(fastafile);
174 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
175 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
177 while(!inFASTA.eof()){
178 if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
180 Sequence currSeq(inFASTA);
181 if (currSeq.getName() != "") {
182 bool goodSeq = 1; // innocent until proven guilty
183 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
184 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
185 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
186 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
187 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
188 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
191 currSeq.printSequence(goodSeqOut);
194 currSeq.printSequence(badSeqOut);
195 badSeqNames.insert(currSeq.getName());
201 if(namefile != "" && groupfile != "") {
202 screenNameGroupFile(badSeqNames);
203 if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
204 }else if(namefile != "") {
205 screenNameGroupFile(badSeqNames);
206 if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
207 }else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the group
209 if (m->control_pressed) { goodSeqOut.close(); badSeqOut.close(); inFASTA.close(); remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
211 if(alignreport != "") { screenAlignReport(badSeqNames); }
218 if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
220 m->mothurOutEndLine();
221 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
222 m->mothurOut(goodSeqFile); m->mothurOutEndLine();
223 m->mothurOut(badSeqFile); m->mothurOutEndLine();
224 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
225 m->mothurOutEndLine();
230 catch(exception& e) {
231 m->errorOut(e, "ScreenSeqsCommand", "execute");
236 //***************************************************************************************************************
238 int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
241 openInputFile(namefile, inputNames);
242 set<string> badSeqGroups;
243 string seqName, seqList, group;
244 set<string>::iterator it;
246 string goodNameFile = outputDir + getRootName(getSimpleName(namefile)) + "good" + getExtension(namefile);
247 string badNameFile = outputDir + getRootName(getSimpleName(namefile)) + "bad" + getExtension(namefile);
249 outputNames.push_back(goodNameFile); outputNames.push_back(badNameFile);
251 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
252 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
254 while(!inputNames.eof()){
255 if (m->control_pressed) { goodNameOut.close(); badNameOut.close(); inputNames.close(); remove(goodNameFile.c_str()); remove(badNameFile.c_str()); return 0; }
257 inputNames >> seqName >> seqList;
258 it = badSeqNames.find(seqName);
260 if(it != badSeqNames.end()){
261 badSeqNames.erase(it);
262 badNameOut << seqName << '\t' << seqList << endl;
265 for(int i=0;i<seqList.length();i++){
266 if(seqList[i] == ','){
267 badSeqGroups.insert(seqList.substr(start,i-start));
271 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
275 goodNameOut << seqName << '\t' << seqList << endl;
283 //we were unable to remove some of the bad sequences
284 if (badSeqNames.size() != 0) {
285 for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
286 m->mothurOut("Your namefile does not include the sequence " + *it + " please correct.");
287 m->mothurOutEndLine();
293 ifstream inputGroups;
294 openInputFile(groupfile, inputGroups);
296 string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile);
297 string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile);
299 outputNames.push_back(goodGroupFile); outputNames.push_back(badGroupFile);
301 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
302 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
304 while(!inputGroups.eof()){
305 if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodNameFile.c_str()); remove(badNameFile.c_str()); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
307 inputGroups >> seqName >> group;
309 it = badSeqGroups.find(seqName);
311 if(it != badSeqGroups.end()){
312 badSeqGroups.erase(it);
313 badGroupOut << seqName << '\t' << group << endl;
316 goodGroupOut << seqName << '\t' << group << endl;
321 goodGroupOut.close();
324 //we were unable to remove some of the bad sequences
325 if (badSeqGroups.size() != 0) {
326 for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {
327 m->mothurOut("Your namefile does not include the sequence " + *it + " please correct.");
328 m->mothurOutEndLine();
337 //***************************************************************************************************************
339 int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
341 ifstream inputGroups;
342 openInputFile(groupfile, inputGroups);
343 string seqName, group;
344 set<string>::iterator it;
346 string goodGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "good" + getExtension(groupfile);
347 string badGroupFile = outputDir + getRootName(getSimpleName(groupfile)) + "bad" + getExtension(groupfile);
349 outputNames.push_back(goodGroupFile); outputNames.push_back(badGroupFile);
351 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
352 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
354 while(!inputGroups.eof()){
355 if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
357 inputGroups >> seqName >> group;
358 it = badSeqNames.find(seqName);
360 if(it != badSeqNames.end()){
361 badSeqNames.erase(it);
362 badGroupOut << seqName << '\t' << group << endl;
365 goodGroupOut << seqName << '\t' << group << endl;
370 if (m->control_pressed) { goodGroupOut.close(); badGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); return 0; }
372 //we were unable to remove some of the bad sequences
373 if (badSeqNames.size() != 0) {
374 for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
375 m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct.");
376 m->mothurOutEndLine();
381 goodGroupOut.close();
384 if (m->control_pressed) { remove(goodGroupFile.c_str()); remove(badGroupFile.c_str()); }
391 //***************************************************************************************************************
393 int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
395 ifstream inputAlignReport;
396 openInputFile(alignreport, inputAlignReport);
397 string seqName, group;
398 set<string>::iterator it;
400 string goodAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "good" + getExtension(alignreport);
401 string badAlignReportFile = outputDir + getRootName(getSimpleName(alignreport)) + "bad" + getExtension(alignreport);
403 outputNames.push_back(goodAlignReportFile); outputNames.push_back(badAlignReportFile);
405 ofstream goodAlignReportOut; openOutputFile(goodAlignReportFile, goodAlignReportOut);
406 ofstream badAlignReportOut; openOutputFile(badAlignReportFile, badAlignReportOut);
408 while (!inputAlignReport.eof()) { // need to copy header
409 char c = inputAlignReport.get();
410 goodAlignReportOut << c;
411 badAlignReportOut << c;
412 if (c == 10 || c == 13){ break; }
415 while(!inputAlignReport.eof()){
416 if (m->control_pressed) { goodAlignReportOut.close(); badAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
418 inputAlignReport >> seqName;
419 it = badSeqNames.find(seqName);
421 while (!inputAlignReport.eof()) { // need to copy header
422 char c = inputAlignReport.get();
424 if (c == 10 || c == 13){ break; }
427 if(it != badSeqNames.end()){
428 badSeqNames.erase(it);
429 badAlignReportOut << seqName << '\t' << line;;
432 goodAlignReportOut << seqName << '\t' << line;
434 gobble(inputAlignReport);
437 if (m->control_pressed) { goodAlignReportOut.close(); badAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
439 //we were unable to remove some of the bad sequences
440 if (badSeqNames.size() != 0) {
441 for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
442 m->mothurOut("Your file does not include the sequence " + *it + " please correct.");
443 m->mothurOutEndLine();
447 inputAlignReport.close();
448 goodAlignReportOut.close();
449 badAlignReportOut.close();
451 if (m->control_pressed) { remove(goodAlignReportFile.c_str()); remove(badAlignReportFile.c_str()); return 0; }
458 //***************************************************************************************************************