]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
259adc1485222c2070e44c2f1b0bb32e3c86815d
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11
12 //***************************************************************************************************************
13
14 TrimSeqsCommand::TrimSeqsCommand(){
15         try {
16                 
17                 globaldata = GlobalData::getInstance();
18                 
19                 oligos = 0;             
20
21                 if(globaldata->getFastaFile() == ""){
22                         cout << "you need to at least enter a fasta file name" << endl;
23                 }
24                 
25                 if(isTrue(globaldata->getFlip()))                       {       flip = 1;                                                                                                       }
26                 if(globaldata->getOligosFile() != "")           {       oligos = 1;                                                                                                     }
27                 
28                 if(globaldata->getMaxAmbig() != "-1")           {       maxAmbig = atoi(globaldata->getMaxAmbig().c_str());                     }
29                 else                                                                            {       maxAmbig = -1;                                                                                          }
30                 
31                 if(globaldata->getMaxHomoPolymer() != "-1")     {       maxHomoP = atoi(globaldata->getMaxHomoPolymer().c_str());       }
32                 else                                                                            {       maxHomoP = 0;                                                                                           }
33                 
34                 if(globaldata->getMinLength() != "-1")          {       minLength = atoi(globaldata->getMinLength().c_str());           }
35                 else                                                                            {       minLength = 0;                                                                                          }
36                 
37                 if(globaldata->getMaxLength() != "-1")          {       maxLength = atoi(globaldata->getMaxLength().c_str());           }
38                 else                                                                            {       maxLength = 0;                                                                                          }
39                 
40                 if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){       cout << "huh?" << endl; }
41
42         }
43         catch(exception& e) {
44                 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
45                 exit(1);
46         }
47         catch(...) {
48                 cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }       
51 }
52
53 //***************************************************************************************************************
54
55 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
56
57 //***************************************************************************************************************
58
59 int TrimSeqsCommand::execute(){
60         try{
61                 getOligos();
62                 
63                 ifstream inFASTA;
64                 openInputFile(globaldata->getFastaFile(), inFASTA);
65
66                 ofstream outFASTA;
67                 string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta";
68                 openOutputFile(trimSeqFile, outFASTA);
69                 
70                 ofstream outGroups;
71                 string groupFile = getRootName(globaldata->getFastaFile()) + "groups"; 
72                 openOutputFile(groupFile, outGroups);
73
74                 ofstream scrapFASTA;
75                 string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta";
76                 openOutputFile(scrapSeqFile, scrapFASTA);
77
78                 bool success;
79                 
80                 while(!inFASTA.eof()){
81                         Sequence currSeq(inFASTA);
82                         string origSeq = currSeq.getUnaligned();
83                         string group;
84                         string trashCode = "";
85
86                         if(barcodes.size() != 0){
87                                 success = stripBarcode(currSeq, group);
88                                 if(!success){   trashCode += 'b';       }
89                         }
90                         if(numFPrimers != 0){
91                                 success = stripForward(currSeq);
92                                 if(!success){   trashCode += 'f';       }
93                         }
94                         if(numRPrimers != 0){
95                                 success = stripReverse(currSeq);
96                                 if(!success){   trashCode += 'r';       }
97                         }
98                         if(minLength > 0 || maxLength > 0){
99                                 success = cullLength(currSeq);
100                                 if(!success){   trashCode += 'l';       }
101                         }
102                         if(maxHomoP > 0){
103                                 success = cullHomoP(currSeq);
104                                 if(!success){   trashCode += 'h';       }
105                         }
106                         if(maxAmbig != -1){
107                                 success = cullAmbigs(currSeq);
108                                 if(!success){   trashCode += 'n';       }
109                         }
110                         
111                         if(flip){       currSeq.reverseComplement();    }               // should go last                       
112
113                         if(trashCode.length() == 0){
114                                 currSeq.printSequence(outFASTA);
115                                 outGroups << currSeq.getName() << '\t' << group << endl;
116                         }
117                         else{
118                                 currSeq.setName(currSeq.getName() + '|' + trashCode);
119                                 currSeq.setUnaligned(origSeq);
120                                 currSeq.printSequence(scrapFASTA);
121                         }
122                         
123                         gobble(inFASTA);
124                 }
125                 inFASTA.close();
126                 outFASTA.close();
127                 scrapFASTA.close();
128                 outGroups.close();
129                 
130                 return 0;               
131         }
132         catch(exception& e) {
133                 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
134                 exit(1);
135         }
136         catch(...) {
137                 cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }
140 }
141
142 //***************************************************************************************************************
143
144 void TrimSeqsCommand::getOligos(){
145
146         ifstream inOligos;
147         openInputFile(globaldata->getOligosFile(), inOligos);
148
149         string type, oligo, group;
150         
151         while(!inOligos.eof()){
152                 inOligos >> type;
153                 
154                 if(type == "forward"){
155                         inOligos >> oligo;
156                         forPrimer.push_back(oligo);
157                 }
158                 else if(type == "reverse"){
159                         inOligos >> oligo;
160                         revPrimer.push_back(oligo);
161                 }
162                 else if(type == "barcode"){
163                         inOligos >> oligo >> group;
164                         barcodes[oligo]=group;
165                 }
166                 else if(type[0] == '#'){
167                         char c;
168                         while ((c = inOligos.get()) != EOF)     {       if (c == 10){   break;  }       } // get rest of line
169                 }
170                 
171                 gobble(inOligos);
172         }
173
174         numFPrimers = forPrimer.size();
175         numRPrimers = revPrimer.size();
176 }
177
178 //***************************************************************************************************************
179
180 bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
181         
182         string rawSequence = seq.getUnaligned();
183         bool success = 0;       //guilty until proven innocent
184
185         for(map<string,string>::iterator it=barcodes.begin();it!=barcodes.end();it++){
186                 string oligo = it->first;
187                 
188                 if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
189                         success = 0;
190                         break;
191                 }
192                 
193                 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
194                         group = it->second;
195                         seq.setUnaligned(rawSequence.substr(oligo.length()));
196                         success = 1;
197                         break;
198                 }
199         }
200         return success;
201         
202 }
203
204 //***************************************************************************************************************
205
206 bool TrimSeqsCommand::stripForward(Sequence& seq){
207         
208         string rawSequence = seq.getUnaligned();
209         bool success = 0;       //guilty until proven innocent
210         
211         for(int i=0;i<numFPrimers;i++){
212                 string oligo = forPrimer[i];
213
214                 if(rawSequence.length() < oligo.length()){
215                         success = 0;
216                         break;
217                 }
218                 
219                 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
220                         seq.setUnaligned(rawSequence.substr(oligo.length()));
221                         success = 1;
222                         break;
223                 }
224         }
225         
226         return success;
227         
228 }
229
230 //***************************************************************************************************************
231
232 bool TrimSeqsCommand::stripReverse(Sequence& seq){
233         
234         string rawSequence = seq.getUnaligned();
235         bool success = 0;       //guilty until proven innocent
236         
237         for(int i=0;i<numRPrimers;i++){
238                 string oligo = revPrimer[i];
239                 
240                 if(rawSequence.length() < oligo.length()){
241                         success = 0;
242                         break;
243                 }
244                 
245                 if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
246                         seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
247                         success = 1;
248                         break;
249                 }
250         }       
251         return success;
252         
253 }
254
255 //***************************************************************************************************************
256
257 bool TrimSeqsCommand::cullLength(Sequence& seq){
258         
259         int length = seq.getNumBases();
260         bool success = 0;       //guilty until proven innocent
261         
262         if(length >= minLength && maxLength == 0)                       {       success = 1;    }
263         else if(length >= minLength && length <= maxLength)     {       success = 1;    }
264         else                                                                                            {       success = 0;    }
265         
266         return success;
267         
268 }
269
270 //***************************************************************************************************************
271
272 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
273         
274         int longHomoP = seq.getLongHomoPolymer();
275         bool success = 0;       //guilty until proven innocent
276         
277         if(longHomoP <= maxHomoP){      success = 1;    }
278         else                                    {       success = 0;    }
279         
280         return success;
281         
282 }
283
284 //***************************************************************************************************************
285
286 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
287         
288         int numNs = seq.getAmbigBases();
289         bool success = 0;       //guilty until proven innocent
290         
291         if(numNs <= maxAmbig){  success = 1;    }
292         else                                    {       success = 0;    }
293         
294         return success;
295         
296 }
297
298 //***************************************************************************************************************