5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "trimseqscommand.h"
12 //***************************************************************************************************************
14 TrimSeqsCommand::TrimSeqsCommand(){
17 globaldata = GlobalData::getInstance();
21 if(globaldata->getFastaFile() == ""){
22 cout << "you need to at least enter a fasta file name" << endl;
25 if(isTrue(globaldata->getFlip())) { flip = 1; }
26 if(globaldata->getOligosFile() != "") { oligos = 1; }
28 if(globaldata->getMaxAmbig() != "-1") { maxAmbig = atoi(globaldata->getMaxAmbig().c_str()); }
29 else { maxAmbig = -1; }
31 if(globaldata->getMaxHomoPolymer() != "-1") { maxHomoP = atoi(globaldata->getMaxHomoPolymer().c_str()); }
32 else { maxHomoP = 0; }
34 if(globaldata->getMinLength() != "-1") { minLength = atoi(globaldata->getMinLength().c_str()); }
35 else { minLength = 0; }
37 if(globaldata->getMaxLength() != "-1") { maxLength = atoi(globaldata->getMaxLength().c_str()); }
38 else { maxLength = 0; }
40 if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){ cout << "huh?" << endl; }
44 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48 cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53 //***************************************************************************************************************
55 TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ }
57 //***************************************************************************************************************
59 int TrimSeqsCommand::execute(){
64 openInputFile(globaldata->getFastaFile(), inFASTA);
67 string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta";
68 openOutputFile(trimSeqFile, outFASTA);
71 string groupFile = getRootName(globaldata->getFastaFile()) + "groups";
72 openOutputFile(groupFile, outGroups);
75 string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta";
76 openOutputFile(scrapSeqFile, scrapFASTA);
80 while(!inFASTA.eof()){
81 Sequence currSeq(inFASTA);
82 string origSeq = currSeq.getUnaligned();
84 string trashCode = "";
86 if(barcodes.size() != 0){
87 success = stripBarcode(currSeq, group);
88 if(!success){ trashCode += 'b'; }
91 success = stripForward(currSeq);
92 if(!success){ trashCode += 'f'; }
95 success = stripReverse(currSeq);
96 if(!success){ trashCode += 'r'; }
98 if(minLength > 0 || maxLength > 0){
99 success = cullLength(currSeq);
100 if(!success){ trashCode += 'l'; }
103 success = cullHomoP(currSeq);
104 if(!success){ trashCode += 'h'; }
107 success = cullAmbigs(currSeq);
108 if(!success){ trashCode += 'n'; }
111 if(flip){ currSeq.reverseComplement(); } // should go last
113 if(trashCode.length() == 0){
114 currSeq.printSequence(outFASTA);
115 outGroups << currSeq.getName() << '\t' << group << endl;
118 currSeq.setName(currSeq.getName() + '|' + trashCode);
119 currSeq.setUnaligned(origSeq);
120 currSeq.printSequence(scrapFASTA);
132 catch(exception& e) {
133 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
137 cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
142 //***************************************************************************************************************
144 void TrimSeqsCommand::getOligos(){
147 openInputFile(globaldata->getOligosFile(), inOligos);
149 string type, oligo, group;
151 while(!inOligos.eof()){
154 if(type == "forward"){
156 forPrimer.push_back(oligo);
158 else if(type == "reverse"){
160 revPrimer.push_back(oligo);
162 else if(type == "barcode"){
163 inOligos >> oligo >> group;
164 barcodes[oligo]=group;
166 else if(type[0] == '#'){
168 while ((c = inOligos.get()) != EOF) { if (c == 10){ break; } } // get rest of line
174 numFPrimers = forPrimer.size();
175 numRPrimers = revPrimer.size();
178 //***************************************************************************************************************
180 bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
182 string rawSequence = seq.getUnaligned();
183 bool success = 0; //guilty until proven innocent
185 for(map<string,string>::iterator it=barcodes.begin();it!=barcodes.end();it++){
186 string oligo = it->first;
188 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
193 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
195 seq.setUnaligned(rawSequence.substr(oligo.length()));
204 //***************************************************************************************************************
206 bool TrimSeqsCommand::stripForward(Sequence& seq){
208 string rawSequence = seq.getUnaligned();
209 bool success = 0; //guilty until proven innocent
211 for(int i=0;i<numFPrimers;i++){
212 string oligo = forPrimer[i];
214 if(rawSequence.length() < oligo.length()){
219 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
220 seq.setUnaligned(rawSequence.substr(oligo.length()));
230 //***************************************************************************************************************
232 bool TrimSeqsCommand::stripReverse(Sequence& seq){
234 string rawSequence = seq.getUnaligned();
235 bool success = 0; //guilty until proven innocent
237 for(int i=0;i<numRPrimers;i++){
238 string oligo = revPrimer[i];
240 if(rawSequence.length() < oligo.length()){
245 if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
246 seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
255 //***************************************************************************************************************
257 bool TrimSeqsCommand::cullLength(Sequence& seq){
259 int length = seq.getNumBases();
260 bool success = 0; //guilty until proven innocent
262 if(length >= minLength && maxLength == 0) { success = 1; }
263 else if(length >= minLength && length <= maxLength) { success = 1; }
264 else { success = 0; }
270 //***************************************************************************************************************
272 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
274 int longHomoP = seq.getLongHomoPolymer();
275 bool success = 0; //guilty until proven innocent
277 if(longHomoP <= maxHomoP){ success = 1; }
278 else { success = 0; }
284 //***************************************************************************************************************
286 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
288 int numNs = seq.getAmbigBases();
289 bool success = 0; //guilty until proven innocent
291 if(numNs <= maxAmbig){ success = 1; }
292 else { success = 0; }
298 //***************************************************************************************************************