]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
e493cbfbf84f90fb0b2c30c58dc7619ca21c437b
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11
12 /**************************************************************************************/
13
14 FilterSeqsCommand::FilterSeqsCommand(){
15
16         globaldata = GlobalData::getInstance();
17         
18         if(globaldata->getFastaFile() == "")            {       cout << "You must enter a fasta formatted file" << endl;        }
19         trump = globaldata->getTrump()[0];
20         numSeqs = 0;
21
22 }
23
24 /**************************************************************************************/
25
26 void FilterSeqsCommand::doHard() {
27         
28         string hardName = globaldata->getHard();
29         string hardFilter = "";
30                 
31         ifstream fileHandle;
32         openInputFile(hardName, fileHandle);
33         
34         fileHandle >> filter;
35
36 }
37
38 /**************************************************************************************/
39
40 void FilterSeqsCommand::doTrump(Sequence seq) {
41         
42         string curAligned = seq.getAligned();
43         
44         for(int j = 0; j < alignmentLength; j++) {
45                 if(curAligned[j] == trump){
46                         filter[j] = '0';
47                 }
48         }
49
50 }
51
52 /**************************************************************************************/
53
54 void FilterSeqsCommand::doVertical() {
55
56         for(int i=0;i<alignmentLength;i++){
57                 if(gap[i] == numSeqs)   {       filter[i] = '0';        }
58         }
59         
60 }
61
62 /**************************************************************************************/
63
64 void FilterSeqsCommand::doSoft() {
65         
66         int threshold = int (soft * numSeqs);
67         bool keep = 0;
68         
69         for(int i=0;i<alignmentLength;i++){
70                 if(a[i] >= threshold)           {       keep = 1;       }
71                 else if(t[i] >= threshold)      {       keep = 1;       }
72                 else if(g[i] >= threshold)      {       keep = 1;       }
73                 else if(c[i] >= threshold)      {       keep = 1;       }
74                 
75                 if(keep == 0)   {       filter[i] = 0;          }
76         }
77         
78 }
79
80 /**************************************************************************************/
81
82 void FilterSeqsCommand::getFreqs(Sequence seq) {
83
84         string curAligned = seq.getAligned();;
85         
86         for(int j=0;j<alignmentLength;j++){
87                 if(toupper(curAligned[j]) == 'A')                                                                               {       a[j]++;         }
88                 else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') {       t[j]++;         }
89                 else if(toupper(curAligned[j]) == 'G')                                                                  {       g[j]++;         }
90                 else if(toupper(curAligned[j]) == 'C')                                                                  {       c[j]++;         }
91                 else if(curAligned[j] == '-' || curAligned[j] == '.')                                   {       gap[j]++;       }
92         }
93         
94 }
95
96 /**************************************************************************************/
97
98 int FilterSeqsCommand::execute() {      
99         try {
100                 ifstream inFASTA;
101                 openInputFile(globaldata->getFastaFile(), inFASTA);
102                 
103                 Sequence testSeq(inFASTA);
104                 alignmentLength = testSeq.getAlignLength();
105                 inFASTA.seekg(0);
106                 
107                 if(globaldata->getSoft() != "" || isTrue(globaldata->getVertical())){
108                         a.assign(alignmentLength, 0);
109                         t.assign(alignmentLength, 0);
110                         g.assign(alignmentLength, 0);
111                         c.assign(alignmentLength, 0);
112                         gap.assign(alignmentLength, 0);
113                 }
114                 if(globaldata->getSoft() != ""){
115                         soft = (float)atoi(globaldata->getSoft().c_str()) / 100.0;
116                 }
117                 
118                 if(globaldata->getHard().compare("") != 0)      {       doHard();                                                               }
119                 else                                                                            {       filter = string(alignmentLength, '1');  }
120
121                 if(globaldata->getTrump().compare("") != 0 || isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){
122                 
123                         while(!inFASTA.eof()){
124                                 Sequence seq(inFASTA);
125                                 if(globaldata->getTrump().compare("") != 0)     {       doTrump(seq);           }
126                                 if(isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){        getFreqs(seq);  }
127                                 numSeqs++;
128                                 cout.flush();
129                         }
130                 
131                 }
132                 inFASTA.close();
133                 
134                 if(isTrue(globaldata->getVertical()) == 1)      {       doVertical();   }
135                 if(globaldata->getSoft().compare("") != 0)      {       doSoft();               }                       
136
137                 ofstream outFilter;
138                 string filterFile = getRootName(globaldata->inputFileName) + "filter";
139                 openOutputFile(filterFile, outFilter);
140                 outFilter << filter << endl;
141                 outFilter.close();
142                 
143
144                 openInputFile(globaldata->getFastaFile(), inFASTA);
145                 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
146                 ofstream outFASTA;
147                 openOutputFile(filteredFasta, outFASTA);
148
149                 numSeqs = 0;
150                 while(!inFASTA.eof()){
151                         Sequence seq(inFASTA);
152                         string align = seq.getAligned();
153                         string filterSeq = "";
154         
155                         for(int j=0;j<alignmentLength;j++){
156                                 if(filter[j] == '1'){
157                                         filterSeq += align[j];
158                                 }
159                         }
160
161                         outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
162                         numSeqs++;
163                         gobble(inFASTA);
164                 }
165                 outFASTA.close();
166                 inFASTA.close();
167                 
168                 
169                 int filteredLength = 0;
170                 for(int i=0;i<alignmentLength;i++){
171                         if(filter[i] == '1'){   filteredLength++;       }
172                 }
173                 
174                 cout << endl;
175                 cout << "Length of filtered alignment: " << filteredLength << endl;
176                 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
177                 cout << "Length of the original alignment: " << alignmentLength << endl;
178                 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
179                 
180                 globaldata->clear();
181                 
182                 return 0;
183                 
184         }
185         catch(exception& e) {
186                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
187                 exit(1);
188         }
189         catch(...) {
190                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
191                 exit(1);
192         }
193 }
194
195 /**************************************************************************************/