]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
broke up globaldata and moved error checking and help into commands
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11
12 /**************************************************************************************/
13
14 FilterSeqsCommand::FilterSeqsCommand(string option){
15         try {
16                 globaldata = GlobalData::getInstance();
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta", "trump", "soft", "hard", "vertical"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         parser = new OptionParser();
28                         parser->parse(option, parameters);  delete parser;
29                         
30                         ValidParameters* validParameter = new ValidParameters();
31                 
32                         //check to make sure all parameters are valid for command
33                         for (it = parameters.begin(); it != parameters.end(); it++) { 
34                                 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
35                         }
36                         
37                         //check for required parameters
38                         fastafile = validParameter->validFile(parameters, "fasta", true);
39                         if (fastafile == "not found") { cout << "fasta is a required parameter for the filter.seqs command." << endl; abort = true; }
40                         else if (fastafile == "not open") { abort = true; }     
41                         else { 
42                                 globaldata->setFastaFile(fastafile);
43                         }
44                         
45                         //check for optional parameter and set defaults
46                         // ...at some point should added some additional type checking...
47                         
48                         string temp;
49                         temp = validParameter->validFile(parameters, "trump", false);                           if (temp == "not found") { temp = "."; }
50                         trump = temp[0];
51                         
52                         temp = validParameter->validFile(parameters, "soft", false);                            if (temp == "not found") { soft = 0; }
53                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
54                         
55                         hard = validParameter->validFile(parameters, "hard", true);                                     if (hard == "not found") { hard = ""; }
56                         else if (hard == "not open") { abort = true; }  
57                         
58                         vertical = validParameter->validFile(parameters, "vertical", false);            if (vertical == "not found") { vertical = "F"; }
59         
60                         delete validParameter;
61                         
62                         numSeqs = 0;
63                         
64                 }
65                 
66         }
67         catch(exception& e) {
68                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function FilterSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69                 exit(1);
70         }
71         catch(...) {
72                 cout << "An unknown error has occurred in the FilterSeqsCommand class function FilterSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
73                 exit(1);
74         }       
75 }
76
77 //**********************************************************************************************************************
78
79 void FilterSeqsCommand::help(){
80         try {
81                 cout << "The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file." << "\n";
82                 cout << "The filter.seqs command parameters are fasta, trump, soft, hard and vertical.  " << "\n";
83                 cout << "The fasta parameter is required." << "\n";
84                 cout << "The trump parameter .... The default is '.'" << "\n";
85                 cout << "The soft parameter .... The default is ...." << "\n";
86                 cout << "The hard parameter .... The default is ...." << "\n";
87                 cout << "The vertical parameter .... The default is F." << "\n";
88                 cout << "The filter.seqs command should be in the following format: " << "\n";
89                 cout << "filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) " << "\n";
90                 cout << "Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T)." << "\n";
91                 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n";
92                 
93         }
94         catch(exception& e) {
95                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
96                 exit(1);
97         }
98         catch(...) {
99                 cout << "An unknown error has occurred in the FilterSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
100                 exit(1);
101         }       
102 }
103
104 /**************************************************************************************/
105
106 void FilterSeqsCommand::doHard() {
107         
108         ifstream fileHandle;
109         openInputFile(hard, fileHandle);
110         
111         fileHandle >> filter;
112
113 }
114
115 /**************************************************************************************/
116
117 void FilterSeqsCommand::doTrump(Sequence seq) {
118         
119         string curAligned = seq.getAligned();
120         
121         for(int j = 0; j < alignmentLength; j++) {
122                 if(curAligned[j] == trump){
123                         filter[j] = '0';
124                 }
125         }
126
127 }
128
129 /**************************************************************************************/
130
131 void FilterSeqsCommand::doVertical() {
132
133         for(int i=0;i<alignmentLength;i++){
134                 if(gap[i] == numSeqs)   {       filter[i] = '0';        }
135         }
136         
137 }
138
139 /**************************************************************************************/
140
141 void FilterSeqsCommand::doSoft() {
142         
143         int threshold = int (soft * numSeqs);
144         bool keep = 0;
145         
146         for(int i=0;i<alignmentLength;i++){
147                 if(a[i] >= threshold)           {       keep = 1;       }
148                 else if(t[i] >= threshold)      {       keep = 1;       }
149                 else if(g[i] >= threshold)      {       keep = 1;       }
150                 else if(c[i] >= threshold)      {       keep = 1;       }
151                 
152                 if(keep == 0)   {       filter[i] = 0;          }
153         }
154 }
155
156 /**************************************************************************************/
157
158 void FilterSeqsCommand::getFreqs(Sequence seq) {
159
160         string curAligned = seq.getAligned();;
161         
162         for(int j=0;j<alignmentLength;j++){
163                 if(toupper(curAligned[j]) == 'A')                                                                               {       a[j]++;         }
164                 else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') {       t[j]++;         }
165                 else if(toupper(curAligned[j]) == 'G')                                                                  {       g[j]++;         }
166                 else if(toupper(curAligned[j]) == 'C')                                                                  {       c[j]++;         }
167                 else if(curAligned[j] == '-' || curAligned[j] == '.')                                   {       gap[j]++;       }
168         }
169         
170 }
171
172 /**************************************************************************************/
173
174 int FilterSeqsCommand::execute() {      
175         try {
176         
177                 if (abort == true) { return 0; }
178                 
179                 ifstream inFASTA;
180                 openInputFile(fastafile, inFASTA);
181                 
182                 Sequence testSeq(inFASTA);
183                 alignmentLength = testSeq.getAlignLength();
184                 inFASTA.seekg(0);
185                 
186                 if(soft != 0 || isTrue(vertical)){
187                         a.assign(alignmentLength, 0);
188                         t.assign(alignmentLength, 0);
189                         g.assign(alignmentLength, 0);
190                         c.assign(alignmentLength, 0);
191                         gap.assign(alignmentLength, 0);
192                 }
193                 
194                 if(hard.compare("") != 0)       {       doHard();               }
195                 else                                            {       filter = string(alignmentLength, '1');  }
196
197                 if(isTrue(vertical) || soft != 0){
198                 
199                         while(!inFASTA.eof()){
200                                 Sequence seq(inFASTA);
201                                 doTrump(seq);   
202                                 if(isTrue(vertical) || soft != 0){      getFreqs(seq);  }
203                                 numSeqs++;
204                                 cout.flush();
205                         }
206                 
207                 }
208                 inFASTA.close();
209                 
210                 if(isTrue(vertical) == 1)       {       doVertical();   }
211                 if(soft != 0)   {       doSoft();               }                       
212
213                 ofstream outFilter;
214                 string filterFile = getRootName(fastafile) + "filter";
215                 openOutputFile(filterFile, outFilter);
216                 outFilter << filter << endl;
217                 outFilter.close();
218                 
219
220                 openInputFile(fastafile, inFASTA);
221                 string filteredFasta = getRootName(fastafile) + "filter.fasta";
222                 ofstream outFASTA;
223                 openOutputFile(filteredFasta, outFASTA);
224
225                 numSeqs = 0;
226                 while(!inFASTA.eof()){
227                         Sequence seq(inFASTA);
228                         string align = seq.getAligned();
229                         string filterSeq = "";
230         
231                         for(int j=0;j<alignmentLength;j++){
232                                 if(filter[j] == '1'){
233                                         filterSeq += align[j];
234                                 }
235                         }
236
237                         outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
238                         numSeqs++;
239                         gobble(inFASTA);
240                 }
241                 outFASTA.close();
242                 inFASTA.close();
243                 
244                 
245                 int filteredLength = 0;
246                 for(int i=0;i<alignmentLength;i++){
247                         if(filter[i] == '1'){   filteredLength++;       }
248                 }
249                 
250                 cout << endl;
251                 cout << "Length of filtered alignment: " << filteredLength << endl;
252                 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
253                 cout << "Length of the original alignment: " << alignmentLength << endl;
254                 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
255                 
256                 globaldata->clear();
257                 
258                 return 0;
259                 
260         }
261         catch(exception& e) {
262                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
263                 exit(1);
264         }
265         catch(...) {
266                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
267                 exit(1);
268         }
269 }
270
271 /**************************************************************************************/