2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(){
15 globaldata = GlobalData::getInstance();
17 if(globaldata->getFastaFile() == "") { cout << "You must enter a fasta formatted file" << endl; }
18 trump = globaldata->getTrump()[0];
21 // db = readSeqs->getDB();
22 // numSeqs = db->size();
24 // alignmentLength = db->get(0).getAlignLength();
26 // filter = string(alignmentLength, '1');
29 /**************************************************************************************/
31 void FilterSeqsCommand::doHard() {
33 // string hardName = globaldata->getHard();
34 // string hardFilter = "";
36 // ifstream fileHandle;
37 // openInputFile(hardName, fileHandle);
39 // fileHandle >> hardFilter;
41 // if(hardFilter.length() != filter.length()){
42 // cout << "The hard filter is not the same length as the alignment: Hard filter will not be applied." << endl;
45 // filter = hardFilter;
50 /**************************************************************************************/
52 void FilterSeqsCommand::doTrump() {
55 for(int i = 0; i < numSeqs; i++) {
56 string curAligned = db->get(i).getAligned();;
58 for(int j = 0; j < alignmentLength; j++) {
59 if(curAligned[j] == trump){
67 /**************************************************************************************/
69 void FilterSeqsCommand::doVertical() {
71 // vector<int> counts(alignmentLength, 0);
73 // for(int i = 0; i < numSeqs; i++) {
74 // string curAligned = db->get(i).getAligned();;
76 // for(int j = 0; j < alignmentLength; j++) {
77 // if(curAligned[j] == '-' || curAligned[j] == '.'){
82 // for(int i=0;i<alignmentLength;i++){
83 // if(counts[i] == numSeqs) { filter[i] = '0'; }
87 /**************************************************************************************/
89 void FilterSeqsCommand::doSoft() {
91 // int softThreshold = numSeqs * (float)atoi(globaldata->getSoft().c_str()) / 100.0;
93 // vector<int> a(alignmentLength, 0);
94 // vector<int> t(alignmentLength, 0);
95 // vector<int> g(alignmentLength, 0);
96 // vector<int> c(alignmentLength, 0);
97 // vector<int> x(alignmentLength, 0);
99 // for(int i=0;i<numSeqs;i++){
100 // string curAligned = db->get(i).getAligned();;
102 // for(int j=0;j<alignmentLength;j++){
103 // if(toupper(curAligned[j]) == 'A') { a[j]++; }
104 // else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[i]) == 'U') { t[j]++; }
105 // else if(toupper(curAligned[j]) == 'G') { g[j]++; }
106 // else if(toupper(curAligned[j]) == 'C') { c[j]++; }
110 // for(int i=0;i<alignmentLength;i++){
111 // if(a[i] < softThreshold && t[i] < softThreshold && g[i] < softThreshold && c[i] < softThreshold){
117 /**************************************************************************************/
119 int FilterSeqsCommand::execute() {
123 openInputFile(globaldata->getFastaFile(), inFASTA);
125 Sequence currSequence(inFASTA);
126 alignmentLength = currSequence.getAlignLength();
131 if(globaldata->getHard().compare("") != 0) { doHard(); } // has to be applied first!
132 if(globaldata->getTrump().compare("") != 0) { doTrump(); }
133 if(isTrue(globaldata->getVertical())) { doVertical(); }
134 if(globaldata->getSoft().compare("") != 0) { doSoft(); }
143 string filterFile = getRootName(globaldata->inputFileName) + "filter";
144 openOutputFile(filterFile, outfile);
146 outfile << filter << endl;
149 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
150 openOutputFile(filteredFasta, outfile);
152 for(int i=0;i<numSeqs;i++){
153 string curAligned = db->get(i).getAligned();
154 outfile << '>' << db->get(i).getName() << endl;
155 for(int j=0;j<alignmentLength;j++){
156 if(filter[j] == '1'){
157 outfile << curAligned[j];
164 int filteredLength = 0;
165 for(int i=0;i<alignmentLength;i++){
166 if(filter[i] == '1'){ filteredLength++; }
170 cout << "Length of filtered alignment: " << filteredLength << endl;
171 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
172 cout << "Length of the original alignment: " << alignmentLength << endl;
173 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
180 catch(exception& e) {
181 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
185 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
190 /**************************************************************************************/