2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 ScreenSeqsCommand::ScreenSeqsCommand(string option){
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength",
25 "name", "group", "alignreport"};
26 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
28 OptionParser parser(option);
29 map<string,string> parameters = parser.getParameters();
31 ValidParameters validParameter;
33 //check to make sure all parameters are valid for command
34 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //check for required parameters
39 fastafile = validParameter.validFile(parameters, "fasta", true);
40 if (fastafile == "not found") { mothurOut("fasta is a required parameter for the screen.seqs command."); mothurOutEndLine(); abort = true; }
41 else if (fastafile == "not open") { abort = true; }
43 groupfile = validParameter.validFile(parameters, "group", true);
44 if (groupfile == "not open") { abort = true; }
45 else if (groupfile == "not found") { groupfile = ""; }
47 namefile = validParameter.validFile(parameters, "name", true);
48 if (namefile == "not open") { abort = true; }
49 else if (namefile == "not found") { namefile = ""; }
51 alignreport = validParameter.validFile(parameters, "alignreport", true);
52 if (alignreport == "not open") { abort = true; }
53 else if (alignreport == "not found") { alignreport = ""; }
55 //check for optional parameter and set defaults
56 // ...at some point should added some additional type checking...
58 temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
59 convert(temp, startPos);
61 temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; }
62 convert(temp, endPos);
64 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
65 convert(temp, maxAmbig);
67 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; }
68 convert(temp, maxHomoP);
70 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; }
71 convert(temp, minLength);
73 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; }
74 convert(temp, maxLength);
79 errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
83 //**********************************************************************************************************************
85 void ScreenSeqsCommand::help(){
87 mothurOut("The screen.seqs command reads a fastafile and creates .....\n");
88 mothurOut("The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group.\n");
89 mothurOut("The fasta parameter is required.\n");
90 mothurOut("The start parameter .... The default is -1.\n");
91 mothurOut("The end parameter .... The default is -1.\n");
92 mothurOut("The maxambig parameter .... The default is -1.\n");
93 mothurOut("The maxhomop parameter .... The default is -1.\n");
94 mothurOut("The minlength parameter .... The default is -1.\n");
95 mothurOut("The maxlength parameter .... The default is -1.\n");
96 mothurOut("The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n");
97 mothurOut("The screen.seqs command should be in the following format: \n");
98 mothurOut("screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, \n");
99 mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) \n");
100 mothurOut("Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
101 mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
104 catch(exception& e) {
105 errorOut(e, "ScreenSeqsCommand", "help");
110 //***************************************************************************************************************
112 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
114 //***************************************************************************************************************
116 int ScreenSeqsCommand::execute(){
119 if (abort == true) { return 0; }
122 openInputFile(fastafile, inFASTA);
124 set<string> badSeqNames;
126 string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile);
127 string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile);
129 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
130 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
132 while(!inFASTA.eof()){
133 Sequence currSeq(inFASTA);
134 bool goodSeq = 1; // innocent until proven guilty
135 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
136 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
137 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
138 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
139 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
140 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
143 currSeq.printSequence(goodSeqOut);
146 currSeq.printSequence(badSeqOut);
147 badSeqNames.insert(currSeq.getName());
151 if(namefile != "" && groupfile != "") { screenNameGroupFile(badSeqNames); } // this screens both names and groups
152 else if(groupfile != "") { screenGroupFile(badSeqNames); } // this screens just the groups
153 if(alignreport != "") { screenAlignReport(badSeqNames); }
160 catch(exception& e) {
161 errorOut(e, "ScreenSeqsCommand", "execute");
166 //***************************************************************************************************************
168 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
171 openInputFile(namefile, inputNames);
172 set<string> badSeqGroups;
173 string seqName, seqList, group;
174 set<string>::iterator it;
176 string goodNameFile = getRootName(namefile) + "good" + getExtension(namefile);
177 string badNameFile = getRootName(namefile) + "bad" + getExtension(namefile);
179 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
180 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
182 while(!inputNames.eof()){
183 inputNames >> seqName >> seqList;
184 it = badSeqNames.find(seqName);
186 if(it != badSeqNames.end()){
187 badSeqNames.erase(it);
188 badNameOut << seqName << '\t' << seqList << endl;
191 for(int i=0;i<seqList.length();i++){
192 if(seqList[i] == ','){
193 badSeqGroups.insert(seqList.substr(start,i-start));
197 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
201 goodNameOut << seqName << '\t' << seqList << endl;
211 ifstream inputGroups;
212 openInputFile(groupfile, inputGroups);
214 string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile);
215 string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile);
217 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
218 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
220 while(!inputGroups.eof()){
221 inputGroups >> seqName >> group;
223 it = badSeqGroups.find(seqName);
225 if(it != badSeqGroups.end()){
226 badSeqGroups.erase(it);
227 badGroupOut << seqName << '\t' << group << endl;
230 goodGroupOut << seqName << '\t' << group << endl;
235 goodGroupOut.close();
240 //***************************************************************************************************************
242 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
244 ifstream inputGroups;
245 openInputFile(groupfile, inputGroups);
246 string seqName, group;
247 set<string>::iterator it;
249 string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile);
250 string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile);
252 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
253 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
255 while(!inputGroups.eof()){
256 inputGroups >> seqName >> group;
257 it = badSeqNames.find(seqName);
259 if(it != badSeqNames.end()){
260 badSeqNames.erase(it);
261 badGroupOut << seqName << '\t' << group << endl;
264 goodGroupOut << seqName << '\t' << group << endl;
269 goodGroupOut.close();
274 //***************************************************************************************************************
276 void ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
278 ifstream inputAlignReport;
279 openInputFile(alignreport, inputAlignReport);
280 string seqName, group;
281 set<string>::iterator it;
283 string goodAlignReportFile = getRootName(alignreport) + "good" + getExtension(alignreport);
284 string badAlignReportFile = getRootName(alignreport) + "bad" + getExtension(alignreport);
286 ofstream goodAlignReportOut; openOutputFile(goodAlignReportFile, goodAlignReportOut);
287 ofstream badAlignReportOut; openOutputFile(badAlignReportFile, badAlignReportOut);
289 while (!inputAlignReport.eof()) { // need to copy header
290 char c = inputAlignReport.get();
291 goodAlignReportOut << c;
292 badAlignReportOut << c;
293 if (c == 10 || c == 13){ break; }
296 while(!inputAlignReport.eof()){
297 inputAlignReport >> seqName;
298 it = badSeqNames.find(seqName);
300 while (!inputAlignReport.eof()) { // need to copy header
301 char c = inputAlignReport.get();
303 if (c == 10 || c == 13){ break; }
306 if(it != badSeqNames.end()){
307 badSeqNames.erase(it);
308 badAlignReportOut << seqName << '\t' << line;;
311 goodAlignReportOut << seqName << '\t' << line;
313 gobble(inputAlignReport);
315 inputAlignReport.close();
316 goodAlignReportOut.close();
317 badAlignReportOut.close();
321 //***************************************************************************************************************