5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
12 //***************************************************************************************************************
14 TrimSeqsCommand::TrimSeqsCommand(string option){
17 globaldata = GlobalData::getInstance();
20 //allow user to run help
21 if(option == "help") { help(); abort = true; }
24 //valid paramters for this command
25 string AlignArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength"};
26 vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
28 parser = new OptionParser();
29 parser->parse(option, parameters); delete parser;
31 ValidParameters* validParameter = new ValidParameters();
33 //check to make sure all parameters are valid for command
34 for (it = parameters.begin(); it != parameters.end(); it++) {
35 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
38 //check for required parameters
39 fastafile = validParameter->validFile(parameters, "fasta", true);
40 if (fastafile == "not found") { cout << "fasta is a required parameter for the screen.seqs command." << endl; abort = true; }
41 else if (fastafile == "not open") { abort = true; }
42 else { globaldata->setFastaFile(fastafile); }
45 //check for optional parameter and set defaults
46 // ...at some point should added some additional type checking...
48 temp = validParameter->validFile(parameters, "flip", false); if (temp == "not found") { temp = "0"; }
49 if(isTrue(temp)) { flip = 1; }
51 temp = validParameter->validFile(parameters, "oligos", false); if (temp == "not found") { temp = ""; }
52 if(temp != "") { oligos = 1; }
55 temp = validParameter->validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
56 convert(temp, maxAmbig);
58 temp = validParameter->validFile(parameters, "maxhomop", false); if (temp == "not found") { temp = "0"; }
59 convert(temp, maxHomoP);
61 temp = validParameter->validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
62 convert(temp, minLength);
64 temp = validParameter->validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
65 convert(temp, maxLength);
67 if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){ cout << "huh?" << endl; }
69 delete validParameter;
74 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
78 cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
82 //**********************************************************************************************************************
84 void TrimSeqsCommand::help(){
86 cout << "The trim.seqs command reads a fastafile and creates ....." << "\n";
87 cout << "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength and maxlength." << "\n";
88 cout << "The fasta parameter is required." << "\n";
89 cout << "The flip parameter .... The default is 0." << "\n";
90 cout << "The oligos parameter .... The default is ""." << "\n";
91 cout << "The maxambig parameter .... The default is -1." << "\n";
92 cout << "The maxhomop parameter .... The default is 0." << "\n";
93 cout << "The minlength parameter .... The default is 0." << "\n";
94 cout << "The maxlength parameter .... The default is 0." << "\n";
95 cout << "The trim.seqs command should be in the following format: " << "\n";
96 cout << "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig, " << "\n";
97 cout << "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength) " << "\n";
98 cout << "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...)." << "\n";
99 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n";
102 catch(exception& e) {
103 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
107 cout << "An unknown error has occurred in the TrimSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
113 //***************************************************************************************************************
115 TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ }
117 //***************************************************************************************************************
119 int TrimSeqsCommand::execute(){
122 if (abort == true) { return 0; }
127 openInputFile(fastafile, inFASTA);
130 string trimSeqFile = getRootName(fastafile) + "trim.fasta";
131 openOutputFile(trimSeqFile, outFASTA);
134 string groupFile = getRootName(fastafile) + "groups";
135 openOutputFile(groupFile, outGroups);
138 string scrapSeqFile = getRootName(fastafile) + "scrap.fasta";
139 openOutputFile(scrapSeqFile, scrapFASTA);
143 while(!inFASTA.eof()){
144 Sequence currSeq(inFASTA);
145 string origSeq = currSeq.getUnaligned();
147 string trashCode = "";
149 if(barcodes.size() != 0){
150 success = stripBarcode(currSeq, group);
151 if(!success){ trashCode += 'b'; }
153 if(numFPrimers != 0){
154 success = stripForward(currSeq);
155 if(!success){ trashCode += 'f'; }
157 if(numRPrimers != 0){
158 success = stripReverse(currSeq);
159 if(!success){ trashCode += 'r'; }
161 if(minLength > 0 || maxLength > 0){
162 success = cullLength(currSeq);
163 if(!success){ trashCode += 'l'; }
166 success = cullHomoP(currSeq);
167 if(!success){ trashCode += 'h'; }
170 success = cullAmbigs(currSeq);
171 if(!success){ trashCode += 'n'; }
174 if(flip){ currSeq.reverseComplement(); } // should go last
176 if(trashCode.length() == 0){
177 currSeq.printSequence(outFASTA);
178 outGroups << currSeq.getName() << '\t' << group << endl;
181 currSeq.setName(currSeq.getName() + '|' + trashCode);
182 currSeq.setUnaligned(origSeq);
183 currSeq.printSequence(scrapFASTA);
195 catch(exception& e) {
196 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
200 cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205 //***************************************************************************************************************
207 void TrimSeqsCommand::getOligos(){
210 //openInputFile(globaldata->getOligosFile(), inOligos);
212 string type, oligo, group;
214 while(!inOligos.eof()){
217 if(type == "forward"){
219 forPrimer.push_back(oligo);
221 else if(type == "reverse"){
223 revPrimer.push_back(oligo);
225 else if(type == "barcode"){
226 inOligos >> oligo >> group;
227 barcodes[oligo]=group;
229 else if(type[0] == '#'){
231 while ((c = inOligos.get()) != EOF) { if (c == 10){ break; } } // get rest of line
237 numFPrimers = forPrimer.size();
238 numRPrimers = revPrimer.size();
241 //***************************************************************************************************************
243 bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
245 string rawSequence = seq.getUnaligned();
246 bool success = 0; //guilty until proven innocent
248 for(map<string,string>::iterator it=barcodes.begin();it!=barcodes.end();it++){
249 string oligo = it->first;
251 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
256 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
258 seq.setUnaligned(rawSequence.substr(oligo.length()));
267 //***************************************************************************************************************
269 bool TrimSeqsCommand::stripForward(Sequence& seq){
271 string rawSequence = seq.getUnaligned();
272 bool success = 0; //guilty until proven innocent
274 for(int i=0;i<numFPrimers;i++){
275 string oligo = forPrimer[i];
277 if(rawSequence.length() < oligo.length()){
282 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
283 seq.setUnaligned(rawSequence.substr(oligo.length()));
293 //***************************************************************************************************************
295 bool TrimSeqsCommand::stripReverse(Sequence& seq){
297 string rawSequence = seq.getUnaligned();
298 bool success = 0; //guilty until proven innocent
300 for(int i=0;i<numRPrimers;i++){
301 string oligo = revPrimer[i];
303 if(rawSequence.length() < oligo.length()){
308 if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
309 seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
318 //***************************************************************************************************************
320 bool TrimSeqsCommand::cullLength(Sequence& seq){
322 int length = seq.getNumBases();
323 bool success = 0; //guilty until proven innocent
325 if(length >= minLength && maxLength == 0) { success = 1; }
326 else if(length >= minLength && length <= maxLength) { success = 1; }
327 else { success = 0; }
333 //***************************************************************************************************************
335 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
337 int longHomoP = seq.getLongHomoPolymer();
338 bool success = 0; //guilty until proven innocent
340 if(longHomoP <= maxHomoP){ success = 1; }
341 else { success = 0; }
347 //***************************************************************************************************************
349 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
351 int numNs = seq.getAmbigBases();
352 bool success = 0; //guilty until proven innocent
354 if(numNs <= maxAmbig){ success = 1; }
355 else { success = 0; }
361 //***************************************************************************************************************