]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
filterseqscommand added
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 __MyCompanyName__. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include <iostream>
12 #include <fstream>
13
14 /**************************************************************************************/
15 void FilterSeqsCommand::doTrump() {
16         trump = globaldata->getTrump();
17         for(int i = 0; i < db->size(); i++) {
18                 Sequence cur = db->get(i);
19                 string curAligned = cur.getAligned();
20                 for(int j = 0; j < curAligned.length(); j++) {
21                         string curChar = curAligned.substr(j, 1);
22                         if(curChar.compare(trump) == 0) 
23                                 columnsToRemove[j] = true;
24                 }
25         }
26 }
27
28 /**************************************************************************************/
29 void FilterSeqsCommand::doSoft() {
30         soft = atoi(globaldata->getSoft().c_str());
31         vector<vector<int> > columnSymbolSums;
32         vector<vector<string> > columnSymbols;
33         for(int i = 0; i < db->get(0).getLength(); i++) {
34                 vector<string> symbols;
35                 vector<int> sums;
36                 columnSymbols.push_back(symbols);
37                 columnSymbolSums.push_back(sums);
38         }
39         
40         for(int i = 0; i < db->size(); i++) {
41                 Sequence cur = db->get(i);
42                 string curAligned = cur.getAligned();
43                 
44                 for(int j = 0; j < curAligned.length(); j++) {
45                         string curChar = curAligned.substr(j, 1);
46                         vector<string> curColumnSymbols = columnSymbols[j];
47                         bool newSymbol = true;
48                         
49                         for(int k = 0; k < curColumnSymbols.size(); k++) 
50                                 if(curChar.compare(curColumnSymbols[k]) == 0) {
51                                         newSymbol = false;
52                                         columnSymbolSums[j][k]++;
53                                 }
54                         
55                         if(newSymbol) {
56                                 columnSymbols[j].push_back(curChar);
57                                 columnSymbolSums[j].push_back(1);
58                         }
59                 }
60         }
61         
62         
63         for(int i = 0; i < columnSymbolSums.size(); i++) {
64                 int totalSum = 0;
65                 int max = 0;
66                 vector<int> curColumnSymbols = columnSymbolSums[i];
67                 
68                 for(int j = 0; j < curColumnSymbols.size(); j++) {
69                         int curSum = curColumnSymbols[j];
70                         //cout << columnSymbols[i][j] << ": " << curSum << "\n";
71                         if(curSum > max)
72                                 max = curSum;
73                         totalSum += curSum;
74                 }
75                 //cout << "\n";
76                 
77                 if((double)max/(double)totalSum * 100 < soft)
78                         columnsToRemove[i] = true;
79         }
80 }
81
82 /**************************************************************************************/
83 void FilterSeqsCommand::doFilter() {
84         filter = globaldata->getFilter();
85         ifstream filehandle;
86         openInputFile(filter, filehandle);
87         
88         char c;
89         int count = 0;
90         while(!filehandle.eof()) {
91                 c = filehandle.get();
92                 if(c == '0') 
93                         columnsToRemove[count] = true;
94                 count++;
95         }
96 }
97
98 /**************************************************************************************/
99 int FilterSeqsCommand::execute() {      
100         try {
101                 globaldata = GlobalData::getInstance();
102                 db = globaldata->gSequenceDB;
103                 
104                 for(int i = 0; i < db->get(0).getLength(); i++) 
105                         columnsToRemove.push_back(false);
106                 
107                                 
108                 if(globaldata->getTrump().compare("") != 0) 
109                         doTrump();
110                 else if(globaldata->getSoft().compare("") != 0)
111                         doSoft();
112                         
113                 else if(globaldata->getFilter().compare("") != 0) 
114                         doFilter();
115                 
116                 //for(int i = 0; i < columnsToRemove.size(); i++)
117 //              {
118 //                      cout << "Remove Column " << i << " = ";
119 //                      if(columnsToRemove[i])
120 //                              cout << "true\n";
121 //                      else
122 //                              cout << "false\n";
123 //              }
124                 //Creating the new SequenceDB 
125                 SequenceDB newDB;
126                 for(int i = 0; i < db->size(); i++) {
127                         Sequence curSeq = db->get(i);
128                         string curAligned = curSeq.getAligned();
129                         string curName = curSeq.getName();
130                         string newAligned = "";
131                         for(int j = 0; j < curAligned.length(); j++) 
132                                 if(!columnsToRemove[j]) 
133                                         newAligned += curAligned.substr(j, 1);
134                         
135                         Sequence newSeq(curName, newAligned);
136                         newDB.add(newSeq);
137                 }
138                 
139                 ofstream outfile;
140                 outfile.open("filtertest.txt");
141                 newDB.print(outfile);
142                 outfile.close();
143                         
144                 return 0;
145         }
146         catch(exception& e) {
147                 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
148                 exit(1);
149         }
150         catch(...) {
151                 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
152                 exit(1);
153         }
154 }
155 /**************************************************************************************/