]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
*** empty log message ***
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11
12 /**************************************************************************************/
13
14 FilterSeqsCommand::FilterSeqsCommand(){
15         globaldata = GlobalData::getInstance();
16         
17         if(globaldata->getFastaFile() == "")            {       cout << "You must enter a fasta formatted file" << endl;        }
18         trump = globaldata->getTrump()[0];
19 //      vertical = 
20 //      readSeqs->read();
21 //      db = readSeqs->getDB();
22 //      numSeqs = db->size();
23 //      
24 //      alignmentLength = db->get(0).getAlignLength();
25 //
26 //      filter = string(alignmentLength, '1');
27 }
28
29 /**************************************************************************************/
30
31 void FilterSeqsCommand::doHard() {
32         
33         string hardName = globaldata->getHard();
34         string hardFilter = "";
35                 
36         ifstream fileHandle;
37         openInputFile(hardName, fileHandle);
38         
39         fileHandle >> hardFilter;
40         
41         if(hardFilter.length() != filter.length()){
42                 cout << "The hard filter is not the same length as the alignment: Hard filter will not be applied." << endl;
43         }
44         else{
45                 filter = hardFilter;
46         }
47
48 }
49
50 /**************************************************************************************/
51
52 void FilterSeqsCommand::doTrump(Sequence seq) {
53
54         
55 //      for(int i = 0; i < numSeqs; i++) {
56 //              string curAligned = db->get(i).getAligned();;
57 //
58 //              for(int j = 0; j < alignmentLength; j++) {
59 //                      if(curAligned[j] == trump){
60 //                              filter[j] = '0';
61 //                      }
62 //              }
63 //      }
64
65 }
66
67 /**************************************************************************************/
68
69 void FilterSeqsCommand::doVertical(Sequence seq) {
70
71 //      vector<int> counts(alignmentLength, 0);
72 //      
73 //      for(int i = 0; i < numSeqs; i++) {
74 //              string curAligned = db->get(i).getAligned();;
75 //              
76 //              for(int j = 0; j < alignmentLength; j++) {
77 //                      if(curAligned[j] == '-' || curAligned[j] == '.'){
78 //                              counts[j]++;
79 //                      }
80 //              }
81 //      }
82 //      for(int i=0;i<alignmentLength;i++){
83 //              if(counts[i] == numSeqs)        {       filter[i] = '0';                }
84 //      }
85 }
86
87 /**************************************************************************************/
88
89 void FilterSeqsCommand::doSoft(Sequence seq) {
90
91 //      int softThreshold = numSeqs * (float)atoi(globaldata->getSoft().c_str()) / 100.0;
92 //
93 //      vector<int> a(alignmentLength, 0);
94 //      vector<int> t(alignmentLength, 0);
95 //      vector<int> g(alignmentLength, 0);
96 //      vector<int> c(alignmentLength, 0);
97 //      vector<int> x(alignmentLength, 0);
98 //      
99 //      for(int i=0;i<numSeqs;i++){
100 //              string curAligned = db->get(i).getAligned();;
101 //
102 //              for(int j=0;j<alignmentLength;j++){
103 //                      if(toupper(curAligned[j]) == 'A')                                                                               {       a[j]++; }
104 //                      else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[i]) == 'U') {       t[j]++; }
105 //                      else if(toupper(curAligned[j]) == 'G')                                                                  {       g[j]++; }
106 //                      else if(toupper(curAligned[j]) == 'C')                                                                  {       c[j]++; }
107 //              }
108 //      }
109 //
110 //      for(int i=0;i<alignmentLength;i++){
111 //              if(a[i] < softThreshold && t[i] < softThreshold && g[i] < softThreshold && c[i] < softThreshold){
112 //                      filter[i] = '0';                        
113 //              }
114 //      }
115 }
116
117 /**************************************************************************************/
118
119 int FilterSeqsCommand::execute() {      
120         try {
121                 ifstream inFASTA;
122                 openInputFile(globaldata->getFastaFile(), inFASTA);
123                 
124                 Sequence testSeq(inFASTA);
125                 alignmentLength = testSeq.getAlignLength();
126                 inFASTA.seekg(0);
127                 int numSeqs = 0;
128                 
129                 if(globaldata->getHard().compare("") != 0)      {       doHard();                                                               }
130                 else                                                                            {       filter = string(alignmentLength, '1');  }
131                 while(!inFASTA.eof()){
132                         Sequence seq(inFASTA);
133                         if(globaldata->getTrump().compare("") != 0)     {       doTrump(seq);           }
134                         if(isTrue(globaldata->getVertical()) == 1)      {       doVertical(seq);        }
135 //                      if(globaldata->getSoft().compare("") != 0)      {       doSoft(seq);            }                       
136                         numSeqs++;
137                 }
138                 
139                 ofstream outfile;
140                 string filterFile = getRootName(globaldata->inputFileName) + "filter";
141                 openOutputFile(filterFile, outfile);
142
143                 outfile << filter << endl;
144                 outfile.close();
145                 
146                 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
147                 openOutputFile(filteredFasta, outfile);
148
149 //              for(int i=0;i<numSeqs;i++){
150 //                      string curAligned = db->get(i).getAligned();
151 //                      outfile << '>' << db->get(i).getName() << endl;
152 //                      for(int j=0;j<alignmentLength;j++){
153 //                              if(filter[j] == '1'){
154 //                                      outfile << curAligned[j];
155 //                              }
156 //                      }
157 //                      outfile << endl;
158 //              }
159 //              outfile.close();
160                 
161                 int filteredLength = 0;
162                 for(int i=0;i<alignmentLength;i++){
163                         if(filter[i] == '1'){   filteredLength++;       }
164                 }
165                 
166                 cout << endl;
167                 cout << "Length of filtered alignment: " << filteredLength << endl;
168                 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
169                 cout << "Length of the original alignment: " << alignmentLength << endl;
170                 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
171                 
172                 globaldata->clear();
173                 
174                 return 0;
175                 
176         }
177         catch(exception& e) {
178                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }
181         catch(...) {
182                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183                 exit(1);
184         }
185 }
186
187 /**************************************************************************************/