5 * Created by Pat Schloss on 6/6/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "sequence.hpp"
11 #include "trimseqscommand.h"
13 //***************************************************************************************************************
15 TrimSeqsCommand::TrimSeqsCommand(){
19 forwardPrimerMismatch = 0;
20 reversePrimerMismatch = 0;
23 totalBarcodeCount = 0;
24 matchBarcodeCount = 0;
26 globaldata = GlobalData::getInstance();
27 if(globaldata->getFastaFile() == ""){
28 cout << "you need to at least enter a fasta file name" << endl;
31 if(isTrue(globaldata->getFlip())) { flip = 1; }
33 if(globaldata->getOligosFile() != ""){
35 forwardPrimerMismatch = atoi(globaldata->getForwardMismatch().c_str());
36 reversePrimerMismatch = atoi(globaldata->getReverseMismatch().c_str());
37 barcodeMismatch = atoi(globaldata->getBarcodeMismatch().c_str());
40 if(!flip && !oligos) { cout << "what was the point?" << endl; }
45 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49 cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
54 //***************************************************************************************************************
56 TrimSeqsCommand::~TrimSeqsCommand(){ /* do nothing */ }
58 //***************************************************************************************************************
60 int TrimSeqsCommand::execute(){
65 openInputFile(globaldata->getFastaFile(), inFASTA);
68 string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta";
69 openOutputFile(trimSeqFile, outFASTA);
72 string groupFile = getRootName(globaldata->getFastaFile()) + "groups";
73 openOutputFile(groupFile, outGroups);
76 string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta";
77 openOutputFile(scrapSeqFile, scrapFASTA);
81 while(!inFASTA.eof()){
82 Sequence currSeq(inFASTA);
84 string trashCode = "";
86 if(barcodes.size() != 0){
87 success = stripBarcode(currSeq, group);
88 if(!success){ trashCode += 'b'; }
91 success = stripForward(currSeq);
92 if(!success){ trashCode += 'f'; }
95 success = stripReverse(currSeq);
96 if(!success){ trashCode += 'r'; }
98 if(flip){ currSeq.reverseComplement(); } // should go last
100 if(trashCode.length() == 0){
101 currSeq.printSequence(outFASTA);
102 outGroups << currSeq.getName() << '\t' << group << endl;
105 currSeq.setName(currSeq.getName() + '|' + trashCode);
106 currSeq.printSequence(scrapFASTA);
118 catch(exception& e) {
119 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
123 cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
128 //***************************************************************************************************************
130 void TrimSeqsCommand::getOligos(){
133 openInputFile(globaldata->getOligosFile(), inOligos);
135 string type, oligo, group;
137 while(!inOligos.eof()){
140 if(type == "forward"){
142 forPrimer.push_back(oligo);
144 else if(type == "reverse"){
146 revPrimer.push_back(oligo);
148 else if(type == "barcode"){
149 inOligos >> oligo >> group;
150 barcodes[oligo]=group;
152 else if(type[0] == '#'){
154 while ((c = inOligos.get()) != EOF) { if (c == 10){ break; } } // get rest of line
160 numFPrimers = forPrimer.size();
161 numRPrimers = revPrimer.size();
164 //***************************************************************************************************************
166 bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
168 string rawSequence = seq.getUnaligned();
169 bool success = 0; //guilty until proven innocent
171 for(map<string,string>::iterator it=barcodes.begin();it!=barcodes.end();it++){
172 string oligo = it->first;
174 if(rawSequence.length() < oligo.length()){ //let's just assume that the barcodes are the same length
179 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
181 seq.setUnaligned(rawSequence.substr(oligo.length()));
192 //***************************************************************************************************************
194 bool TrimSeqsCommand::stripForward(Sequence& seq){
196 string rawSequence = seq.getUnaligned();
197 bool success = 0; //guilty until proven innocent
199 for(int i=0;i<numFPrimers;i++){
200 string oligo = forPrimer[i];
202 if(rawSequence.length() < oligo.length()){
207 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
208 seq.setUnaligned(rawSequence.substr(oligo.length()));
220 //***************************************************************************************************************
222 bool TrimSeqsCommand::stripReverse(Sequence& seq){
224 string rawSequence = seq.getUnaligned();
225 bool success = 0; //guilty until proven innocent
227 for(int i=0;i<numRPrimers;i++){
228 string oligo = revPrimer[i];
230 if(rawSequence.length() < oligo.length()){
235 if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
236 seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
248 //***************************************************************************************************************