2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(){
16 globaldata = GlobalData::getInstance();
18 if(globaldata->getFastaFile() == "") { cout << "You must enter a fasta formatted file" << endl; }
19 trump = globaldata->getTrump()[0];
24 /**************************************************************************************/
26 void FilterSeqsCommand::doHard() {
28 string hardName = globaldata->getHard();
29 string hardFilter = "";
32 openInputFile(hardName, fileHandle);
38 /**************************************************************************************/
40 void FilterSeqsCommand::doTrump(Sequence seq) {
42 string curAligned = seq.getAligned();
44 for(int j = 0; j < alignmentLength; j++) {
45 if(curAligned[j] == trump){
52 /**************************************************************************************/
54 void FilterSeqsCommand::doVertical() {
56 for(int i=0;i<alignmentLength;i++){
57 if(gap[i] == numSeqs) { filter[i] = '0'; }
62 /**************************************************************************************/
64 void FilterSeqsCommand::doSoft() {
66 int threshold = int (soft * numSeqs);
69 for(int i=0;i<alignmentLength;i++){
70 if(a[i] >= threshold) { keep = 1; }
71 else if(t[i] >= threshold) { keep = 1; }
72 else if(g[i] >= threshold) { keep = 1; }
73 else if(c[i] >= threshold) { keep = 1; }
75 if(keep == 0) { filter[i] = 0; }
79 /**************************************************************************************/
81 void FilterSeqsCommand::getFreqs(Sequence seq) {
83 string curAligned = seq.getAligned();;
85 for(int j=0;j<alignmentLength;j++){
86 if(toupper(curAligned[j]) == 'A') { a[j]++; }
87 else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
88 else if(toupper(curAligned[j]) == 'G') { g[j]++; }
89 else if(toupper(curAligned[j]) == 'C') { c[j]++; }
90 else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
95 /**************************************************************************************/
97 int FilterSeqsCommand::execute() {
100 openInputFile(globaldata->getFastaFile(), inFASTA);
102 Sequence testSeq(inFASTA);
103 alignmentLength = testSeq.getAlignLength();
106 if(globaldata->getSoft() != "" || isTrue(globaldata->getVertical())){
107 a.assign(alignmentLength, 0);
108 t.assign(alignmentLength, 0);
109 g.assign(alignmentLength, 0);
110 c.assign(alignmentLength, 0);
111 gap.assign(alignmentLength, 0);
113 if(globaldata->getSoft() != ""){
114 soft = (float)atoi(globaldata->getSoft().c_str()) / 100.0;
117 if(globaldata->getHard().compare("") != 0) { doHard(); }
118 else { filter = string(alignmentLength, '1'); }
120 if(globaldata->getTrump().compare("") != 0 || isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){
122 while(!inFASTA.eof()){
123 Sequence seq(inFASTA);
124 if(globaldata->getTrump().compare("") != 0) { doTrump(seq); }
125 if(isTrue(globaldata->getVertical()) || globaldata->getSoft().compare("") != 0){ getFreqs(seq); }
133 if(isTrue(globaldata->getVertical()) == 1) { doVertical(); }
134 if(globaldata->getSoft().compare("") != 0) { doSoft(); }
137 string filterFile = getRootName(globaldata->inputFileName) + "filter";
138 openOutputFile(filterFile, outFilter);
139 outFilter << filter << endl;
143 openInputFile(globaldata->getFastaFile(), inFASTA);
144 string filteredFasta = getRootName(globaldata->inputFileName) + "filter.fasta";
146 openOutputFile(filteredFasta, outFASTA);
149 while(!inFASTA.eof()){
150 Sequence seq(inFASTA);
151 string align = seq.getAligned();
152 string filterSeq = "";
154 for(int j=0;j<alignmentLength;j++){
155 if(filter[j] == '1'){
156 filterSeq += align[j];
160 outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
168 int filteredLength = 0;
169 for(int i=0;i<alignmentLength;i++){
170 if(filter[i] == '1'){ filteredLength++; }
174 cout << "Length of filtered alignment: " << filteredLength << endl;
175 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
176 cout << "Length of the original alignment: " << alignmentLength << endl;
177 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
184 catch(exception& e) {
185 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
189 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
194 /**************************************************************************************/