]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
pat's edits of screen.seqs and summary.seqs
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11
12 /**************************************************************************************/
13
14 FilterSeqsCommand::FilterSeqsCommand(){
15         globaldata = GlobalData::getInstance();
16         
17         if(globaldata->getFastaFile() == "")            {       cout << "You must enter a fasta formatted file" << endl;        }
18         trump = globaldata->getTrump()[0];
19         vertical = 
20 //      readSeqs->read();
21 //      db = readSeqs->getDB();
22 //      numSeqs = db->size();
23 //      
24 //      alignmentLength = db->get(0).getAlignLength();
25 //
26 //      filter = string(alignmentLength, '1');
27 }
28
29 /**************************************************************************************/
30
31 void FilterSeqsCommand::doHard() {
32         
33 //      string hardName = globaldata->getHard();
34 //      string hardFilter = "";
35 //              
36 //      ifstream fileHandle;
37 //      openInputFile(hardName, fileHandle);
38 //      
39 //      fileHandle >> hardFilter;
40 //      
41 //      if(hardFilter.length() != filter.length()){
42 //              cout << "The hard filter is not the same length as the alignment: Hard filter will not be applied." << endl;
43 //      }
44 //      else{
45 //              filter = hardFilter;
46 //      }
47
48 }
49
50 /**************************************************************************************/
51
52 void FilterSeqsCommand::doTrump() {
53
54         
55         for(int i = 0; i < numSeqs; i++) {
56                 string curAligned = db->get(i).getAligned();;
57
58                 for(int j = 0; j < alignmentLength; j++) {
59                         if(curAligned[j] == trump){
60                                 filter[j] = '0';
61                         }
62                 }
63         }
64
65 }
66
67 /**************************************************************************************/
68
69 void FilterSeqsCommand::doVertical() {
70
71 //      vector<int> counts(alignmentLength, 0);
72 //      
73 //      for(int i = 0; i < numSeqs; i++) {
74 //              string curAligned = db->get(i).getAligned();;
75 //              
76 //              for(int j = 0; j < alignmentLength; j++) {
77 //                      if(curAligned[j] == '-' || curAligned[j] == '.'){
78 //                              counts[j]++;
79 //                      }
80 //              }
81 //      }
82 //      for(int i=0;i<alignmentLength;i++){
83 //              if(counts[i] == numSeqs)        {       filter[i] = '0';                }
84 //      }
85 }
86
87 /**************************************************************************************/
88
89 void FilterSeqsCommand::doSoft() {
90
91 //      int softThreshold = numSeqs * (float)atoi(globaldata->getSoft().c_str()) / 100.0;
92 //
93 //      vector<int> a(alignmentLength, 0);
94 //      vector<int> t(alignmentLength, 0);
95 //      vector<int> g(alignmentLength, 0);
96 //      vector<int> c(alignmentLength, 0);
97 //      vector<int> x(alignmentLength, 0);
98 //      
99 //      for(int i=0;i<numSeqs;i++){
100 //              string curAligned = db->get(i).getAligned();;
101 //
102 //              for(int j=0;j<alignmentLength;j++){
103 //                      if(toupper(curAligned[j]) == 'A')                                                                               {       a[j]++; }
104 //                      else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[i]) == 'U') {       t[j]++; }
105 //                      else if(toupper(curAligned[j]) == 'G')                                                                  {       g[j]++; }
106 //                      else if(toupper(curAligned[j]) == 'C')                                                                  {       c[j]++; }
107 //              }
108 //      }
109 //
110 //      for(int i=0;i<alignmentLength;i++){
111 //              if(a[i] < softThreshold && t[i] < softThreshold && g[i] < softThreshold && c[i] < softThreshold){
112 //                      filter[i] = '0';                        
113 //              }
114 //      }
115 }
116
117 /**************************************************************************************/
118
119 int FilterSeqsCommand::execute() {      
120         try {
121                 
122                 ifstream inFASTA;
123                 openInputFile(globaldata->getFastaFile(), inFASTA);
124                 
125                 Sequence currSequence(inFASTA);
126                 alignmentLength = currSequence.getAlignLength();
127                 
128                 //while
129                 
130                 
131                 if(globaldata->getHard().compare("") != 0)      {       doHard();               }       //      has to be applied first!
132                 if(globaldata->getTrump().compare("") != 0)     {       doTrump();              }
133                 if(isTrue(globaldata->getVertical()))           {       doVertical();   }
134                 if(globaldata->getSoft().compare("") != 0)      {       doSoft();               }
135
136                 
137                 
138                 
139                 
140                 
141                 
142                 ofstream outfile;
143                 string filterFile = getRootName(globaldata->inputFileName) + "filter";
144                 openOutputFile(filterFile, outfile);
145
146                 outfile << filter << endl;
147                 outfile.close();
148                 
149                 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
150                 openOutputFile(filteredFasta, outfile);
151
152                 for(int i=0;i<numSeqs;i++){
153                         string curAligned = db->get(i).getAligned();
154                         outfile << '>' << db->get(i).getName() << endl;
155                         for(int j=0;j<alignmentLength;j++){
156                                 if(filter[j] == '1'){
157                                         outfile << curAligned[j];
158                                 }
159                         }
160                         outfile << endl;
161                 }
162                 outfile.close();
163                 
164                 int filteredLength = 0;
165                 for(int i=0;i<alignmentLength;i++){
166                         if(filter[i] == '1'){   filteredLength++;       }
167                 }
168                 
169                 cout << endl;
170                 cout << "Length of filtered alignment: " << filteredLength << endl;
171                 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
172                 cout << "Length of the original alignment: " << alignmentLength << endl;
173                 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
174                 
175                 globaldata->clear();
176                 
177                 return 0;
178                 
179         }
180         catch(exception& e) {
181                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182                 exit(1);
183         }
184         catch(...) {
185                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
186                 exit(1);
187         }
188 }
189
190 /**************************************************************************************/