2 * screenseqscommand.cpp
5 * Created by Pat Schloss on 6/3/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
13 //***************************************************************************************************************
15 ScreenSeqsCommand::ScreenSeqsCommand(string option){
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string AlignArray[] = {"fasta", "start", "end", "maxambig", "maxhomop", "minlength", "maxlength", "name", "group"};
25 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
27 OptionParser parser(option);
28 map<string,string> parameters = parser.getParameters();
30 ValidParameters validParameter;
32 //check to make sure all parameters are valid for command
33 for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //check for required parameters
38 fastafile = validParameter.validFile(parameters, "fasta", true);
39 if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; }
40 else if (fastafile == "not open") { abort = true; }
42 groupfile = validParameter.validFile(parameters, "group", true);
43 if (groupfile == "not open") { abort = true; }
44 else if (groupfile == "not found") { groupfile = ""; }
46 namefile = validParameter.validFile(parameters, "name", true);
47 if (namefile == "not open") { abort = true; }
48 else if (namefile == "not found") { namefile = ""; }
51 //check for optional parameter and set defaults
52 // ...at some point should added some additional type checking...
54 temp = validParameter.validFile(parameters, "start", false); if (temp == "not found") { temp = "-1"; }
55 convert(temp, startPos);
57 temp = validParameter.validFile(parameters, "end", false); if (temp == "not found") { temp = "-1"; }
58 convert(temp, endPos);
60 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
61 convert(temp, maxAmbig);
63 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "-1"; }
64 convert(temp, maxHomoP);
66 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "-1"; }
67 convert(temp, minLength);
69 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "-1"; }
70 convert(temp, maxLength);
75 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
79 cout << "An unknown error has occurred in the ScreenSeqsCommand class function ScreenSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
83 //**********************************************************************************************************************
85 void ScreenSeqsCommand::help(){
87 cout << "The screen.seqs command reads a fastafile and creates ....." << "\n";
88 cout << "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, and group." << "\n";
89 cout << "The fasta parameter is required." << "\n";
90 cout << "The start parameter .... The default is -1." << "\n";
91 cout << "The end parameter .... The default is -1." << "\n";
92 cout << "The maxambig parameter .... The default is -1." << "\n";
93 cout << "The maxhomop parameter .... The default is -1." << "\n";
94 cout << "The minlength parameter .... The default is -1." << "\n";
95 cout << "The maxlength parameter .... The default is -1." << "\n";
96 cout << "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile." << "\n";
97 cout << "The screen.seqs command should be in the following format: " << "\n";
98 cout << "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig, " << "\n";
99 cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) " << "\n";
100 cout << "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n";
101 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n";
104 catch(exception& e) {
105 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
109 cout << "An unknown error has occurred in the ScreenSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
114 //***************************************************************************************************************
116 ScreenSeqsCommand::~ScreenSeqsCommand(){ /* do nothing */ }
118 //***************************************************************************************************************
120 int ScreenSeqsCommand::execute(){
123 if (abort == true) { return 0; }
126 openInputFile(fastafile, inFASTA);
128 set<string> badSeqNames;
130 string goodSeqFile = getRootName(fastafile) + "good" + getExtension(fastafile);
131 string badSeqFile = getRootName(fastafile) + "bad" + getExtension(fastafile);
133 ofstream goodSeqOut; openOutputFile(goodSeqFile, goodSeqOut);
134 ofstream badSeqOut; openOutputFile(badSeqFile, badSeqOut);
136 while(!inFASTA.eof()){
137 Sequence currSeq(inFASTA);
138 bool goodSeq = 1; // innocent until proven guilty
139 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
140 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
141 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
142 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
143 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
144 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
147 currSeq.printSequence(goodSeqOut);
150 currSeq.printSequence(badSeqOut);
151 badSeqNames.insert(currSeq.getName());
156 screenNameGroupFile(badSeqNames);
158 else if(groupfile != ""){
159 screenGroupFile(badSeqNames);
164 catch(exception& e) {
165 cout << "Standard Error: " << e.what() << " has occurred in the ScreenSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
169 cout << "An unknown error has occurred in the ScreenSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175 //***************************************************************************************************************
177 void ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
180 openInputFile(namefile, inputNames);
181 set<string> badSeqGroups;
182 string seqName, seqList, group;
183 set<string>::iterator it;
185 string goodNameFile = getRootName(namefile) + "good" + getExtension(namefile);
186 string badNameFile = getRootName(namefile) + "bad" + getExtension(namefile);
188 ofstream goodNameOut; openOutputFile(goodNameFile, goodNameOut);
189 ofstream badNameOut; openOutputFile(badNameFile, badNameOut);
191 while(!inputNames.eof()){
192 inputNames >> seqName >> seqList;
193 it = badSeqNames.find(seqName);
195 if(it != badSeqNames.end()){
196 badSeqNames.erase(it);
197 badNameOut << seqName << '\t' << seqList << endl;
200 for(int i=0;i<seqList.length();i++){
201 if(seqList[i] == ','){
202 badSeqGroups.insert(seqList.substr(start,i-start));
206 badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
210 goodNameOut << seqName << '\t' << seqList << endl;
220 ifstream inputGroups;
221 openInputFile(groupfile, inputGroups);
223 string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile);
224 string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile);
226 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
227 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
229 while(!inputGroups.eof()){
230 inputGroups >> seqName >> group;
232 it = badSeqGroups.find(seqName);
234 if(it != badSeqGroups.end()){
235 badSeqGroups.erase(it);
236 badGroupOut << seqName << '\t' << group << endl;
239 goodGroupOut << seqName << '\t' << group << endl;
244 goodGroupOut.close();
249 //***************************************************************************************************************
251 void ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
253 ifstream inputGroups;
254 openInputFile(groupfile, inputGroups);
255 string seqName, group;
256 set<string>::iterator it;
258 string goodGroupFile = getRootName(groupfile) + "good" + getExtension(groupfile);
259 string badGroupFile = getRootName(groupfile) + "bad" + getExtension(groupfile);
261 ofstream goodGroupOut; openOutputFile(goodGroupFile, goodGroupOut);
262 ofstream badGroupOut; openOutputFile(badGroupFile, badGroupOut);
264 while(!inputGroups.eof()){
265 inputGroups >> seqName >> group;
266 it = badSeqNames.find(seqName);
268 if(it != badSeqNames.end()){
269 badSeqNames.erase(it);
270 badGroupOut << seqName << '\t' << group << endl;
273 goodGroupOut << seqName << '\t' << group << endl;
278 goodGroupOut.close();
283 //***************************************************************************************************************