2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(){
15 globaldata = GlobalData::getInstance();
17 if(globaldata->getFastaFile() == "") { cout << "You must enter a fasta formatted file" << endl; }
18 trump = globaldata->getTrump()[0];
21 // db = readSeqs->getDB();
22 // numSeqs = db->size();
24 // alignmentLength = db->get(0).getAlignLength();
26 // filter = string(alignmentLength, '1');
29 /**************************************************************************************/
31 void FilterSeqsCommand::doHard() {
33 string hardName = globaldata->getHard();
34 string hardFilter = "";
37 openInputFile(hardName, fileHandle);
39 fileHandle >> hardFilter;
41 if(hardFilter.length() != filter.length()){
42 cout << "The hard filter is not the same length as the alignment: Hard filter will not be applied." << endl;
50 /**************************************************************************************/
52 void FilterSeqsCommand::doTrump(Sequence seq) {
55 // for(int i = 0; i < numSeqs; i++) {
56 // string curAligned = db->get(i).getAligned();;
58 // for(int j = 0; j < alignmentLength; j++) {
59 // if(curAligned[j] == trump){
67 /**************************************************************************************/
69 void FilterSeqsCommand::doVertical(Sequence seq) {
71 // vector<int> counts(alignmentLength, 0);
73 // for(int i = 0; i < numSeqs; i++) {
74 // string curAligned = db->get(i).getAligned();;
76 // for(int j = 0; j < alignmentLength; j++) {
77 // if(curAligned[j] == '-' || curAligned[j] == '.'){
82 // for(int i=0;i<alignmentLength;i++){
83 // if(counts[i] == numSeqs) { filter[i] = '0'; }
87 /**************************************************************************************/
89 void FilterSeqsCommand::doSoft(Sequence seq) {
91 // int softThreshold = numSeqs * (float)atoi(globaldata->getSoft().c_str()) / 100.0;
93 // vector<int> a(alignmentLength, 0);
94 // vector<int> t(alignmentLength, 0);
95 // vector<int> g(alignmentLength, 0);
96 // vector<int> c(alignmentLength, 0);
97 // vector<int> x(alignmentLength, 0);
99 // for(int i=0;i<numSeqs;i++){
100 // string curAligned = db->get(i).getAligned();;
102 // for(int j=0;j<alignmentLength;j++){
103 // if(toupper(curAligned[j]) == 'A') { a[j]++; }
104 // else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[i]) == 'U') { t[j]++; }
105 // else if(toupper(curAligned[j]) == 'G') { g[j]++; }
106 // else if(toupper(curAligned[j]) == 'C') { c[j]++; }
110 // for(int i=0;i<alignmentLength;i++){
111 // if(a[i] < softThreshold && t[i] < softThreshold && g[i] < softThreshold && c[i] < softThreshold){
117 /**************************************************************************************/
119 int FilterSeqsCommand::execute() {
122 openInputFile(globaldata->getFastaFile(), inFASTA);
124 Sequence testSeq(inFASTA);
125 alignmentLength = testSeq.getAlignLength();
129 if(globaldata->getHard().compare("") != 0) { doHard(); }
130 else { filter = string(alignmentLength, '1'); }
131 while(!inFASTA.eof()){
132 Sequence seq(inFASTA);
133 if(globaldata->getTrump().compare("") != 0) { doTrump(seq); }
134 if(isTrue(globaldata->getVertical()) == 1) { doVertical(seq); }
135 // if(globaldata->getSoft().compare("") != 0) { doSoft(seq); }
140 string filterFile = getRootName(globaldata->inputFileName) + "filter";
141 openOutputFile(filterFile, outfile);
143 outfile << filter << endl;
146 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
147 openOutputFile(filteredFasta, outfile);
149 // for(int i=0;i<numSeqs;i++){
150 // string curAligned = db->get(i).getAligned();
151 // outfile << '>' << db->get(i).getName() << endl;
152 // for(int j=0;j<alignmentLength;j++){
153 // if(filter[j] == '1'){
154 // outfile << curAligned[j];
161 int filteredLength = 0;
162 for(int i=0;i<alignmentLength;i++){
163 if(filter[i] == '1'){ filteredLength++; }
167 cout << "Length of filtered alignment: " << filteredLength << endl;
168 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
169 cout << "Length of the original alignment: " << alignmentLength << endl;
170 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
177 catch(exception& e) {
178 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
187 /**************************************************************************************/