2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(){
16 globaldata = GlobalData::getInstance();
18 if(globaldata->getFastaFile() == "") { cout << "You must enter a fasta formatted file" << endl; }
19 trump = globaldata->getTrump()[0];
24 /**************************************************************************************/
26 void FilterSeqsCommand::doHard() {
28 string hardName = globaldata->getHard();
29 string hardFilter = "";
32 openInputFile(hardName, fileHandle);
38 /**************************************************************************************/
40 void FilterSeqsCommand::doTrump(Sequence seq) {
42 string curAligned = seq.getAligned();
44 for(int j = 0; j < alignmentLength; j++) {
45 if(curAligned[j] == trump){
52 /**************************************************************************************/
54 void FilterSeqsCommand::doVertical() {
56 for(int i=0;i<alignmentLength;i++){
57 if(gap[i] == numSeqs) { filter[i] = '0'; }
62 /**************************************************************************************/
64 void FilterSeqsCommand::doSoft() {
66 int threshold = int (soft * numSeqs);
69 for(int i=0;i<alignmentLength;i++){
70 if(a[i] >= threshold) { keep = 1; }
71 else if(t[i] >= threshold) { keep = 1; }
72 else if(g[i] >= threshold) { keep = 1; }
73 else if(c[i] >= threshold) { keep = 1; }
75 if(keep == 0) { filter[i] = 0; }
80 /**************************************************************************************/
82 void FilterSeqsCommand::getFreqs(Sequence seq) {
84 string curAligned = seq.getAligned();;
86 for(int j=0;j<alignmentLength;j++){
87 if(toupper(curAligned[j]) == 'A') { a[j]++; }
88 else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
89 else if(toupper(curAligned[j]) == 'G') { g[j]++; }
90 else if(toupper(curAligned[j]) == 'C') { c[j]++; }
91 else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
96 /**************************************************************************************/
98 int FilterSeqsCommand::execute() {
101 openInputFile(globaldata->getFastaFile(), inFASTA);
103 Sequence testSeq(inFASTA);
104 alignmentLength = testSeq.getAlignLength();
107 if(globaldata->getSoft() != "" || isTrue(globaldata->getVertical())){
108 a.assign(alignmentLength, 0);
109 t.assign(alignmentLength, 0);
110 g.assign(alignmentLength, 0);
111 c.assign(alignmentLength, 0);
112 gap.assign(alignmentLength, 0);
114 if(globaldata->getSoft() != ""){
115 soft = (float)atoi(globaldata->getSoft().c_str()) / 100.0;
118 if(globaldata->getHard().compare("") != 0) { doHard(); }
119 else { filter = string(alignmentLength, '1'); }
121 if(globaldata->getTrump().compare("") != 0 || isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){
123 while(!inFASTA.eof()){
124 Sequence seq(inFASTA);
125 if(globaldata->getTrump().compare("") != 0) { doTrump(seq); }
126 if(isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){ getFreqs(seq); }
134 if(isTrue(globaldata->getVertical()) == 1) { doVertical(); }
135 if(globaldata->getSoft().compare("") != 0) { doSoft(); }
138 string filterFile = getRootName(globaldata->inputFileName) + "filter";
139 openOutputFile(filterFile, outFilter);
140 outFilter << filter << endl;
144 openInputFile(globaldata->getFastaFile(), inFASTA);
145 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
147 openOutputFile(filteredFasta, outFASTA);
150 while(!inFASTA.eof()){
151 Sequence seq(inFASTA);
152 string align = seq.getAligned();
153 string filterSeq = "";
155 for(int j=0;j<alignmentLength;j++){
156 if(filter[j] == '1'){
157 filterSeq += align[j];
161 outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
169 int filteredLength = 0;
170 for(int i=0;i<alignmentLength;i++){
171 if(filter[i] == '1'){ filteredLength++; }
175 cout << "Length of filtered alignment: " << filteredLength << endl;
176 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
177 cout << "Length of the original alignment: " << alignmentLength << endl;
178 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
185 catch(exception& e) {
186 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
190 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
195 /**************************************************************************************/