2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(){
15 globaldata = GlobalData::getInstance();
17 if(globaldata->getFastaFile() == "") { cout << "You must enter a fasta formatted file" << endl; }
18 trump = globaldata->getTrump()[0];
21 // db = readSeqs->getDB();
22 // numSeqs = db->size();
24 // alignmentLength = db->get(0).getAlignLength();
26 // filter = string(alignmentLength, '1');
29 /**************************************************************************************/
31 void FilterSeqsCommand::doHard() {
33 // string hardName = globaldata->getHard();
34 // string hardFilter = "";
36 // ifstream fileHandle;
37 // openInputFile(hardName, fileHandle);
39 // fileHandle >> hardFilter;
41 // if(hardFilter.length() != filter.length()){
42 // cout << "The hard filter is not the same length as the alignment: Hard filter will not be applied." << endl;
45 // filter = hardFilter;
50 /**************************************************************************************/
52 void FilterSeqsCommand::doTrump() {
55 for(int i = 0; i < numSeqs; i++) {
56 string curAligned = db->get(i).getAligned();;
58 for(int j = 0; j < alignmentLength; j++) {
59 if(curAligned[j] == trump){
67 /**************************************************************************************/
69 void FilterSeqsCommand::doVertical() {
71 // vector<int> counts(alignmentLength, 0);
73 // for(int i = 0; i < numSeqs; i++) {
74 // string curAligned = db->get(i).getAligned();;
76 // for(int j = 0; j < alignmentLength; j++) {
77 // if(curAligned[j] == '-' || curAligned[j] == '.'){
82 // for(int i=0;i<alignmentLength;i++){
83 // if(counts[i] == numSeqs) { filter[i] = '0'; }
87 /**************************************************************************************/
89 void FilterSeqsCommand::doSoft() {
91 // int softThreshold = numSeqs * (float)atoi(globaldata->getSoft().c_str()) / 100.0;
93 // vector<int> a(alignmentLength, 0);
94 // vector<int> t(alignmentLength, 0);
95 // vector<int> g(alignmentLength, 0);
96 // vector<int> c(alignmentLength, 0);
97 // vector<int> x(alignmentLength, 0);
99 // for(int i=0;i<numSeqs;i++){
100 // string curAligned = db->get(i).getAligned();;
102 // for(int j=0;j<alignmentLength;j++){
103 // if(toupper(curAligned[j]) == 'A') { a[j]++; }
104 // else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[i]) == 'U') { t[j]++; }
105 // else if(toupper(curAligned[j]) == 'G') { g[j]++; }
106 // else if(toupper(curAligned[j]) == 'C') { c[j]++; }
110 // for(int i=0;i<alignmentLength;i++){
111 // if(a[i] < softThreshold && t[i] < softThreshold && g[i] < softThreshold && c[i] < softThreshold){
117 /**************************************************************************************/
119 int FilterSeqsCommand::execute() {
122 openInputFile(globaldata->getFastaFile(), inFASTA);
124 Sequence currSequence(inFASTA);
125 alignmentLength = currSequence.getAlignLength();
130 if(globaldata->getHard().compare("") != 0) { doHard(); } // has to be applied first!
131 if(globaldata->getTrump().compare("") != 0) { doTrump(); }
132 if(isTrue(globaldata->getVertical()) == true) { doVertical(); }
133 if(globaldata->getSoft().compare("") != 0) { doSoft(); }
136 string filterFile = getRootName(globaldata->inputFileName) + "filter";
137 openOutputFile(filterFile, outfile);
139 outfile << filter << endl;
142 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
143 openOutputFile(filteredFasta, outfile);
145 for(int i=0;i<numSeqs;i++){
146 string curAligned = db->get(i).getAligned();
147 outfile << '>' << db->get(i).getName() << endl;
148 for(int j=0;j<alignmentLength;j++){
149 if(filter[j] == '1'){
150 outfile << curAligned[j];
157 int filteredLength = 0;
158 for(int i=0;i<alignmentLength;i++){
159 if(filter[i] == '1'){ filteredLength++; }
163 cout << "Length of filtered alignment: " << filteredLength << endl;
164 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
165 cout << "Length of the original alignment: " << alignmentLength << endl;
166 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
173 catch(exception& e) {
174 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
183 /**************************************************************************************/