]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
modified trim.seqs command
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11
12 //***************************************************************************************************************
13
14 TrimSeqsCommand::TrimSeqsCommand(){
15         try {
16                 
17                 oligos = 0;             
18                 totalBarcodeCount = 0;
19                 matchBarcodeCount = 0;
20                 
21                 globaldata = GlobalData::getInstance();
22                 if(globaldata->getFastaFile() == ""){
23                         cout << "you need to at least enter a fasta file name" << endl;
24                 }
25                 
26                 if(isTrue(globaldata->getFlip()))               {       flip = 1;                               }
27                 if(globaldata->getOligosFile() != "")   {       oligos = 1;                             }
28                 if(!flip && !oligos)                                    {       cout << "huh?" << endl; }
29
30         }
31         catch(exception& e) {
32                 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
33                 exit(1);
34         }
35         catch(...) {
36                 cout << "An unknown error has occurred in the TrimSeqsCommand class function TrimSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
37                 exit(1);
38         }       
39 }
40
41 //***************************************************************************************************************
42
43 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
44
45 //***************************************************************************************************************
46
47 int TrimSeqsCommand::execute(){
48         try{
49                 getOligos();
50                 
51                 ifstream inFASTA;
52                 openInputFile(globaldata->getFastaFile(), inFASTA);
53
54                 ofstream outFASTA;
55                 string trimSeqFile = getRootName(globaldata->getFastaFile()) + "trim.fasta";
56                 openOutputFile(trimSeqFile, outFASTA);
57                 
58                 ofstream outGroups;
59                 string groupFile = getRootName(globaldata->getFastaFile()) + "groups"; 
60                 openOutputFile(groupFile, outGroups);
61
62                 ofstream scrapFASTA;
63                 string scrapSeqFile = getRootName(globaldata->getFastaFile()) + "scrap.fasta";
64                 openOutputFile(scrapSeqFile, scrapFASTA);
65
66                 bool success;
67                 
68                 while(!inFASTA.eof()){
69                         Sequence currSeq(inFASTA);
70                         string origSeq = currSeq.getUnaligned();
71                         string group;
72                         string trashCode = "";
73
74                         if(barcodes.size() != 0){
75                                 success = stripBarcode(currSeq, group);
76                                 if(!success){   trashCode += 'b';       }
77                         }
78                         if(numFPrimers != 0){
79                                 success = stripForward(currSeq);
80                                 if(!success){   trashCode += 'f';       }
81                         }
82                         if(numRPrimers != 0){
83                                 success = stripReverse(currSeq);
84                                 if(!success){   trashCode += 'r';       }
85                         }
86                         if(flip){       currSeq.reverseComplement();    }               // should go last                       
87
88                         if(trashCode.length() == 0){
89                                 currSeq.printSequence(outFASTA);
90                                 outGroups << currSeq.getName() << '\t' << group << endl;
91                         }
92                         else{
93                                 currSeq.setName(currSeq.getName() + '|' + trashCode);
94                                 currSeq.setUnaligned(origSeq);
95                                 currSeq.printSequence(scrapFASTA);
96                         }
97                         
98                         gobble(inFASTA);
99                 }
100                 inFASTA.close();
101                 outFASTA.close();
102                 scrapFASTA.close();
103                 outGroups.close();
104                 
105                 return 0;               
106         }
107         catch(exception& e) {
108                 cout << "Standard Error: " << e.what() << " has occurred in the TrimSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
109                 exit(1);
110         }
111         catch(...) {
112                 cout << "An unknown error has occurred in the TrimSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
113                 exit(1);
114         }
115 }
116
117 //***************************************************************************************************************
118
119 void TrimSeqsCommand::getOligos(){
120
121         ifstream inOligos;
122         openInputFile(globaldata->getOligosFile(), inOligos);
123
124         string type, oligo, group;
125         
126         while(!inOligos.eof()){
127                 inOligos >> type;
128                 
129                 if(type == "forward"){
130                         inOligos >> oligo;
131                         forPrimer.push_back(oligo);
132                 }
133                 else if(type == "reverse"){
134                         inOligos >> oligo;
135                         revPrimer.push_back(oligo);
136                 }
137                 else if(type == "barcode"){
138                         inOligos >> oligo >> group;
139                         barcodes[oligo]=group;
140                 }
141                 else if(type[0] == '#'){
142                         char c;
143                         while ((c = inOligos.get()) != EOF)     {       if (c == 10){   break;  }       } // get rest of line
144                 }
145                 
146                 gobble(inOligos);
147         }
148
149         numFPrimers = forPrimer.size();
150         numRPrimers = revPrimer.size();
151 }
152
153 //***************************************************************************************************************
154
155 bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){
156         
157         string rawSequence = seq.getUnaligned();
158         bool success = 0;       //guilty until proven innocent
159
160         for(map<string,string>::iterator it=barcodes.begin();it!=barcodes.end();it++){
161                 string oligo = it->first;
162                 
163                 if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
164                         success = 0;
165                         break;
166                 }
167                 
168                 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
169                         group = it->second;
170                         seq.setUnaligned(rawSequence.substr(oligo.length()));
171                         matchBarcodeCount++;
172                         success = 1;
173                         break;
174                 }
175         }
176         totalBarcodeCount++;
177         return success;
178         
179 }
180
181 //***************************************************************************************************************
182
183 bool TrimSeqsCommand::stripForward(Sequence& seq){
184         
185         string rawSequence = seq.getUnaligned();
186         bool success = 0;       //guilty until proven innocent
187         
188         for(int i=0;i<numFPrimers;i++){
189                 string oligo = forPrimer[i];
190
191                 if(rawSequence.length() < oligo.length()){
192                         success = 0;
193                         break;
194                 }
195                 
196                 if (rawSequence.compare(0,oligo.length(),oligo) == 0){
197                         seq.setUnaligned(rawSequence.substr(oligo.length()));
198                         matchFPrimerCount++;
199                         success = 1;
200                         break;
201                 }
202         }
203         
204         totalFPrimerCount++;
205         return success;
206         
207 }
208
209 //***************************************************************************************************************
210
211 bool TrimSeqsCommand::stripReverse(Sequence& seq){
212         
213         string rawSequence = seq.getUnaligned();
214         bool success = 0;       //guilty until proven innocent
215         
216         for(int i=0;i<numRPrimers;i++){
217                 string oligo = revPrimer[i];
218                 
219                 if(rawSequence.length() < oligo.length()){
220                         success = 0;
221                         break;
222                 }
223                 
224                 if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){
225                         seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length()));
226                         matchRPrimerCount++;
227                         success = 1;
228                         break;
229                 }
230         }       
231         
232         totalRPrimerCount++;
233         return success;
234         
235 }
236
237 //***************************************************************************************************************