2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 ScreenSeqsCommand::ScreenSeqsCommand(string option){
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength",
25 "name", "group", "alignreport"};
26 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
28 OptionParser parser(option);
29 map<string,string> parameters = parser.getParameters();
31 ValidParameters validParameter;
33 //check to make sure all parameters are valid for command
34 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //check for required parameters
39 fastafile = validParameter.validFile(parameters, "fasta", true);
40 if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; }
41 else if (fastafile == "not open") { abort = true; }
43 groupfile = validParameter.validFile(parameters, "group", true);
44 if (groupfile == "not open") { abort = true; }
45 else if (groupfile == "not found") { groupfile = ""; }
47 namefile = validParameter.validFile(parameters, "name", true);
48 if (namefile == "not open") { abort = true; }
49 else if (namefile == "not found") { namefile = ""; }
51 alignreport = validParameter.validFile(parameters, "alignreport", true);
52 if (alignreport == "not open") { abort = true; }
53 else if (alignreport == "not found") { alignreport = ""; }
55 //check for optional parameter and set defaults
56 // ...at some point should added some additional type checking...
58 temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
59 convert(temp, startPos);
61 temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; }
62 convert(temp, endPos);
64 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
65 convert(temp, maxAmbig);
67 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; }
68 convert(temp, maxHomoP);
70 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; }
71 convert(temp, minLength);
73 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; }
74 convert(temp, maxLength);
79 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
83 cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
87 //**********************************************************************************************************************
89 void ScreenSeqsCommand::help(){
91 cout << "The screen.seqs command reads a fastafile and creates ....." << "\n";
92 cout << "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group." << "\n";
93 cout << "The fasta parameter is required." << "\n";
94 cout << "The start parameter .... The default is -1." << "\n";
95 cout << "The end parameter .... The default is -1." << "\n";
96 cout << "The maxambig parameter .... The default is -1." << "\n";
97 cout << "The maxhomop parameter .... The default is -1." << "\n";
98 cout << "The minlength parameter .... The default is -1." << "\n";
99 cout << "The maxlength parameter .... The default is -1." << "\n";
100 cout << "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile." << "\n";
101 cout << "The screen.seqs command should be in the following format: " << "\n";
102 cout << "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, " << "\n";
103 cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) " << "\n";
104 cout << "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n";
105 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n";
108 catch(exception& e) {
109 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
113 cout << "An unknown error has occurred in the ScreenSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
118 //***************************************************************************************************************
120 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
122 //***************************************************************************************************************
124 int ScreenSeqsCommand::execute(){
127 if (abort == true) { return 0; }
130 openInputFile(fastafile, inFASTA);
132 set<string> badSeqNames;
134 string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile);
135 string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile);
137 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
138 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
140 while(!inFASTA.eof()){
141 Sequence currSeq(inFASTA);
142 bool goodSeq = 1; // innocent until proven guilty
143 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
144 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
145 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
146 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
147 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
148 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
151 currSeq.printSequence(goodSeqOut);
154 currSeq.printSequence(badSeqOut);
155 badSeqNames.insert(currSeq.getName());
159 if(namefile != "") { screenNameGroupFile(badSeqNames); }
160 if(groupfile != "") { screenGroupFile(badSeqNames); }
161 if(alignreport != "") { screenAlignReport(badSeqNames); }
168 catch(exception& e) {
169 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173 cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179 //***************************************************************************************************************
181 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
184 openInputFile(namefile, inputNames);
185 set<string> badSeqGroups;
186 string seqName, seqList, group;
187 set<string>::iterator it;
189 string goodNameFile = getRootName(namefile) + "good" + getExtension(namefile);
190 string badNameFile = getRootName(namefile) + "bad" + getExtension(namefile);
192 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
193 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
195 while(!inputNames.eof()){
196 inputNames >> seqName >> seqList;
197 it = badSeqNames.find(seqName);
199 if(it != badSeqNames.end()){
200 badSeqNames.erase(it);
201 badNameOut << seqName << '\t' << seqList << endl;
204 for(int i=0;i<seqList.length();i++){
205 if(seqList[i] == ','){
206 badSeqGroups.insert(seqList.substr(start,i-start));
210 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
214 goodNameOut << seqName << '\t' << seqList << endl;
224 ifstream inputGroups;
225 openInputFile(groupfile, inputGroups);
227 string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile);
228 string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile);
230 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
231 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
233 while(!inputGroups.eof()){
234 inputGroups >> seqName >> group;
236 it = badSeqGroups.find(seqName);
238 if(it != badSeqGroups.end()){
239 badSeqGroups.erase(it);
240 badGroupOut << seqName << '\t' << group << endl;
243 goodGroupOut << seqName << '\t' << group << endl;
248 goodGroupOut.close();
253 //***************************************************************************************************************
255 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
257 ifstream inputGroups;
258 openInputFile(groupfile, inputGroups);
259 string seqName, group;
260 set<string>::iterator it;
262 string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile);
263 string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile);
265 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
266 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
268 while(!inputGroups.eof()){
269 inputGroups >> seqName >> group;
270 it = badSeqNames.find(seqName);
272 if(it != badSeqNames.end()){
273 badSeqNames.erase(it);
274 badGroupOut << seqName << '\t' << group << endl;
277 goodGroupOut << seqName << '\t' << group << endl;
282 goodGroupOut.close();
287 //***************************************************************************************************************
289 void ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
291 ifstream inputAlignReport;
292 openInputFile(alignreport, inputAlignReport);
293 string seqName, group;
294 set<string>::iterator it;
296 string goodAlignReportFile = getRootName(alignreport) + "good" + getExtension(alignreport);
297 string badAlignReportFile = getRootName(alignreport) + "bad" + getExtension(alignreport);
299 ofstream goodAlignReportOut; openOutputFile(goodAlignReportFile, goodAlignReportOut);
300 ofstream badAlignReportOut; openOutputFile(badAlignReportFile, badAlignReportOut);
302 while (!inputAlignReport.eof()) { // need to copy header
303 char c = inputAlignReport.get();
304 goodAlignReportOut << c;
305 badAlignReportOut << c;
306 if (c == 10 || c == 13){ break; }
309 while(!inputAlignReport.eof()){
310 inputAlignReport >> seqName;
311 it = badSeqNames.find(seqName);
313 while (!inputAlignReport.eof()) { // need to copy header
314 char c = inputAlignReport.get();
316 if (c == 10 || c == 13){ break; }
319 if(it != badSeqNames.end()){
320 badSeqNames.erase(it);
321 badAlignReportOut << seqName << '\t' << line;;
324 goodAlignReportOut << seqName << '\t' << line;
326 gobble(inputAlignReport);
328 inputAlignReport.close();
329 goodAlignReportOut.close();
330 badAlignReportOut.close();
334 //***************************************************************************************************************