2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(){
15 globaldata = GlobalData::getInstance();
17 if(globaldata->getFastaFile() != "") { readSeqs = new ReadFasta(globaldata->inputFileName); }
18 else if(globaldata->getNexusFile() != "") { readSeqs = new ReadNexus(globaldata->inputFileName); }
19 else if(globaldata->getClustalFile() != "") { readSeqs = new ReadClustal(globaldata->inputFileName); }
20 else if(globaldata->getPhylipFile() != "") { readSeqs = new ReadPhylip(globaldata->inputFileName); }
23 db = readSeqs->getDB();
26 alignmentLength = db->get(0).getAlignLength();
28 filter = string(alignmentLength, '1');
31 /**************************************************************************************/
33 void FilterSeqsCommand::doHard() {
35 string hardName = globaldata->getHard();
36 string hardFilter = "";
39 openInputFile(hardName, fileHandle);
41 fileHandle >> hardFilter;
43 if(hardFilter.length() != filter.length()){
44 cout << "The hard filter is not the same length as the alignment: Hard filter will not be applied." << endl;
52 /**************************************************************************************/
54 void FilterSeqsCommand::doTrump() {
56 char trump = globaldata->getTrump()[0];
58 for(int i = 0; i < numSeqs; i++) {
59 string curAligned = db->get(i).getAligned();;
61 for(int j = 0; j < alignmentLength; j++) {
62 if(curAligned[j] == trump){
70 /**************************************************************************************/
72 void FilterSeqsCommand::doVertical() {
74 vector<int> counts(alignmentLength, 0);
76 for(int i = 0; i < numSeqs; i++) {
77 string curAligned = db->get(i).getAligned();;
79 for(int j = 0; j < alignmentLength; j++) {
80 if(curAligned[j] == '-' || curAligned[j] == '.'){
85 for(int i=0;i<alignmentLength;i++){
86 if(counts[i] == numSeqs) { filter[i] = '0'; }
90 /**************************************************************************************/
92 void FilterSeqsCommand::doSoft() {
94 int softThreshold = numSeqs * (float)atoi(globaldata->getSoft().c_str()) / 100.0;
96 vector<int> a(alignmentLength, 0);
97 vector<int> t(alignmentLength, 0);
98 vector<int> g(alignmentLength, 0);
99 vector<int> c(alignmentLength, 0);
100 vector<int> x(alignmentLength, 0);
102 for(int i=0;i<numSeqs;i++){
103 string curAligned = db->get(i).getAligned();;
105 for(int j=0;j<alignmentLength;j++){
106 if(toupper(curAligned[j]) == 'A') { a[j]++; }
107 else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[i]) == 'U') { t[j]++; }
108 else if(toupper(curAligned[j]) == 'G') { g[j]++; }
109 else if(toupper(curAligned[j]) == 'C') { c[j]++; }
113 for(int i=0;i<alignmentLength;i++){
114 if(a[i] < softThreshold && t[i] < softThreshold && g[i] < softThreshold && c[i] < softThreshold){
120 /**************************************************************************************/
122 int FilterSeqsCommand::execute() {
125 if(globaldata->getHard().compare("") != 0) { doHard(); } // has to be applied first!
126 if(globaldata->getTrump().compare("") != 0) { doTrump(); }
127 if(globaldata->getVertical() == "T") { doVertical(); }
128 if(globaldata->getSoft().compare("") != 0) { doSoft(); }
131 string filterFile = getRootName(globaldata->inputFileName) + "filter";
132 openOutputFile(filterFile, outfile);
134 outfile << filter << endl;
137 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
138 openOutputFile(filteredFasta, outfile);
140 for(int i=0;i<numSeqs;i++){
141 string curAligned = db->get(i).getAligned();
142 outfile << '>' << db->get(i).getName() << endl;
143 for(int j=0;j<alignmentLength;j++){
144 if(filter[j] == '1'){
145 outfile << curAligned[j];
152 int filteredLength = 0;
153 for(int i=0;i<alignmentLength;i++){
154 if(filter[i] == '1'){ filteredLength++; }
158 cout << "Length of filtered alignment: " << filteredLength << endl;
159 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
160 cout << "Length of the original alignment: " << alignmentLength << endl;
161 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
168 catch(exception& e) {
169 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178 /**************************************************************************************/