]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added trim.seqs command
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "sequence.hpp"
11 #include "trimseqscommand.h"
12
13 //***************************************************************************************************************
14
15 TrimSeqsCommand::TrimSeqsCommand(){
16         try {
17                 
18                 oligos = 0;
19                 forwardPrimerMismatch = 0;
20                 reversePrimerMismatch = 0;
21                 barcodeMismatch = 0;
22                 
23                 totalBarcodeCount = 0;
24                 matchBarcodeCount = 0;
25                 
26                 globaldata = GlobalData::getInstance();
27                 if(globaldata->getFastaFile() == ""){
28                         cout << "you need to at least enter a fasta file name" << endl;
29                 }
30                 
31                 if(isTrue(globaldata->getFlip()))       {       flip = 1;       }
32                 
33                 if(globaldata->getOligosFile() != ""){
34                         oligos = 1;
35                         forwardPrimerMismatch = atoi(globaldata->getForwardMismatch().c_str());
36                         reversePrimerMismatch = atoi(globaldata->getReverseMismatch().c_str());
37                         barcodeMismatch = atoi(globaldata->getBarcodeMismatch().c_str());
38                 }
39                 
40                 if(!flip && !oligos)    {       cout << "what was the point?" << endl;                                                  }
41
42                 
43         }
44         catch(exception& e) {
45                 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
46                 exit(1);
47         }
48         catch(...) {
49                 cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
50                 exit(1);
51         }       
52 }
53
54 //***************************************************************************************************************
55
56 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
57
58 //***************************************************************************************************************
59
60 int TrimSeqsCommand::execute(){
61         try{
62                 getOligos();
63                 
64                 ifstream inFASTA;
65                 openInputFile(globaldata->getFastaFile(), inFASTA);
66
67                 ofstream outFASTA;
68                 string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta";
69                 openOutputFile(trimSeqFile, outFASTA);
70                 
71                 ofstream outGroups;
72                 string groupFile = getRootName(globaldata->getFastaFile()) + "groups"; 
73                 openOutputFile(groupFile, outGroups);
74
75                 ofstream scrapFASTA;
76                 string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta";
77                 openOutputFile(scrapSeqFile, scrapFASTA);
78
79                 bool success;
80                 
81                 while(!inFASTA.eof()){
82                         Sequence currSeq(inFASTA);
83                         string group;
84                         string trashCode = "";
85
86                         if(barcodes.size() != 0){
87                                 success = stripBarcode(currSeq, group);
88                                 if(!success){   trashCode += 'b';       }
89                         }
90                         if(numFPrimers != 0){
91                                 success = stripForward(currSeq);
92                                 if(!success){   trashCode += 'f';       }
93                         }
94                         if(numRPrimers != 0){
95                                 success = stripReverse(currSeq);
96                                 if(!success){   trashCode += 'r';       }
97                         }
98                         if(flip){       currSeq.reverseComplement();    }               // should go last                       
99
100                         if(trashCode.length() == 0){
101                                 currSeq.printSequence(outFASTA);
102                                 outGroups << currSeq.getName() << '\t' << group << endl;
103                         }
104                         else{
105                                 currSeq.setName(currSeq.getName() + '|' + trashCode);
106                                 currSeq.printSequence(scrapFASTA);
107                         }
108                         
109                         gobble(inFASTA);
110                 }
111                 inFASTA.close();
112                 outFASTA.close();
113                 scrapFASTA.close();
114                 outGroups.close();
115                 
116                 return 0;               
117         }
118         catch(exception& e) {
119                 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
120                 exit(1);
121         }
122         catch(...) {
123                 cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
124                 exit(1);
125         }
126 }
127
128 //***************************************************************************************************************
129
130 void TrimSeqsCommand::getOligos(){
131
132         ifstream inOligos;
133         openInputFile(globaldata->getOligosFile(), inOligos);
134
135         string type, oligo, group;
136         
137         while(!inOligos.eof()){
138                 inOligos >> type;
139                 
140                 if(type == "forward"){
141                         inOligos >> oligo;
142                         forPrimer.push_back(oligo);
143                 }
144                 else if(type == "reverse"){
145                         inOligos >> oligo;
146                         revPrimer.push_back(oligo);
147                 }
148                 else if(type == "barcode"){
149                         inOligos >> oligo >> group;
150                         barcodes[oligo]=group;
151                 }
152                 else if(type[0] == '#'){
153                         char c;
154                         while ((c = inOligos.get()) != EOF)     {       if (c == 10){   break;  }       } // get rest of line
155                 }
156                 
157                 gobble(inOligos);
158         }
159
160         numFPrimers = forPrimer.size();
161         numRPrimers = revPrimer.size();
162 }
163
164 //***************************************************************************************************************
165
166 bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
167         
168         string rawSequence = seq.getUnaligned();
169         bool success = 0;       //guilty until proven innocent
170
171         for(map<string,string>::iterator it=barcodes.begin();it!=barcodes.end();it++){
172                 string oligo = it->first;
173                 
174                 if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
175                         success = 0;
176                         break;
177                 }
178                 
179                 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
180                         group = it->second;
181                         seq.setUnaligned(rawSequence.substr(oligo.length()));
182                         matchBarcodeCount++;
183                         success = 1;
184                         break;
185                 }
186         }
187         totalBarcodeCount++;
188         return success;
189         
190 }
191
192 //***************************************************************************************************************
193
194 bool TrimSeqsCommand::stripForward(Sequence& seq){
195         
196         string rawSequence = seq.getUnaligned();
197         bool success = 0;       //guilty until proven innocent
198         
199         for(int i=0;i<numFPrimers;i++){
200                 string oligo = forPrimer[i];
201
202                 if(rawSequence.length() < oligo.length()){
203                         success = 0;
204                         break;
205                 }
206                 
207                 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
208                         seq.setUnaligned(rawSequence.substr(oligo.length()));
209                         matchFPrimerCount++;
210                         success = 1;
211                         break;
212                 }
213                 
214         }
215         totalFPrimerCount++;
216         return success;
217         
218 }
219
220 //***************************************************************************************************************
221
222 bool TrimSeqsCommand::stripReverse(Sequence& seq){
223         
224         string rawSequence = seq.getUnaligned();
225         bool success = 0;       //guilty until proven innocent
226         
227         for(int i=0;i<numRPrimers;i++){
228                 string oligo = revPrimer[i];
229                 
230                 if(rawSequence.length() < oligo.length()){
231                         success = 0;
232                         break;
233                 }
234                 
235                 if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
236                         seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
237                         matchRPrimerCount++;
238                         success = 1;
239                         break;
240                 }
241                 
242         }
243         totalRPrimerCount++;
244         return success;
245         
246 }
247
248 //***************************************************************************************************************