2 * filterseqscommand.cpp
5 * Created by Thomas Ryabin on 5/4/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "filterseqscommand.h"
12 /**************************************************************************************/
14 FilterSeqsCommand::FilterSeqsCommand(string option){
16 globaldata = GlobalData::getInstance();
19 //allow user to run help
20 if(option == "help") { help(); abort = true; }
23 //valid paramters for this command
24 string Array[] = {"fasta", "trump", "soft", "hard", "vertical"};
25 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27 parser = new OptionParser();
28 parser->parse(option, parameters); delete parser;
30 ValidParameters* validParameter = new ValidParameters();
32 //check to make sure all parameters are valid for command
33 for (it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter->isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //check for required parameters
38 fastafile = validParameter->validFile(parameters, "fasta", true);
39 if (fastafile == "not found") { cout << "fasta is a required parameter for the filter.seqs command." << endl; abort = true; }
40 else if (fastafile == "not open") { abort = true; }
42 globaldata->setFastaFile(fastafile);
45 //check for optional parameter and set defaults
46 // ...at some point should added some additional type checking...
49 temp = validParameter->validFile(parameters, "trump", false); if (temp == "not found") { temp = "."; }
52 temp = validParameter->validFile(parameters, "soft", false); if (temp == "not found") { soft = 0; }
53 else { soft = (float)atoi(temp.c_str()) / 100.0; }
55 hard = validParameter->validFile(parameters, "hard", true); if (hard == "not found") { hard = ""; }
56 else if (hard == "not open") { abort = true; }
58 vertical = validParameter->validFile(parameters, "vertical", false); if (vertical == "not found") { vertical = "F"; }
60 delete validParameter;
68 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function FilterSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72 cout << "An unknown error has occurred in the FilterSeqsCommand class function FilterSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
77 //**********************************************************************************************************************
79 void FilterSeqsCommand::help(){
81 cout << "The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file." << "\n";
82 cout << "The filter.seqs command parameters are fasta, trump, soft, hard and vertical. " << "\n";
83 cout << "The fasta parameter is required." << "\n";
84 cout << "The trump parameter .... The default is '.'" << "\n";
85 cout << "The soft parameter .... The default is ...." << "\n";
86 cout << "The hard parameter .... The default is ...." << "\n";
87 cout << "The vertical parameter .... The default is F." << "\n";
88 cout << "The filter.seqs command should be in the following format: " << "\n";
89 cout << "filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) " << "\n";
90 cout << "Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T)." << "\n";
91 cout << "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta)." << "\n" << "\n";
95 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
99 cout << "An unknown error has occurred in the FilterSeqsCommand class function help. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
104 /**************************************************************************************/
106 void FilterSeqsCommand::doHard() {
109 openInputFile(hard, fileHandle);
111 fileHandle >> filter;
115 /**************************************************************************************/
117 void FilterSeqsCommand::doTrump(Sequence seq) {
119 string curAligned = seq.getAligned();
121 for(int j = 0; j < alignmentLength; j++) {
122 if(curAligned[j] == trump){
129 /**************************************************************************************/
131 void FilterSeqsCommand::doVertical() {
133 for(int i=0;i<alignmentLength;i++){
134 if(gap[i] == numSeqs) { filter[i] = '0'; }
139 /**************************************************************************************/
141 void FilterSeqsCommand::doSoft() {
143 int threshold = int (soft * numSeqs);
146 for(int i=0;i<alignmentLength;i++){
147 if(a[i] >= threshold) { keep = 1; }
148 else if(t[i] >= threshold) { keep = 1; }
149 else if(g[i] >= threshold) { keep = 1; }
150 else if(c[i] >= threshold) { keep = 1; }
152 if(keep == 0) { filter[i] = 0; }
156 /**************************************************************************************/
158 void FilterSeqsCommand::getFreqs(Sequence seq) {
160 string curAligned = seq.getAligned();;
162 for(int j=0;j<alignmentLength;j++){
163 if(toupper(curAligned[j]) == 'A') { a[j]++; }
164 else if(toupper(curAligned[j]) == 'T' || toupper(curAligned[j]) == 'U') { t[j]++; }
165 else if(toupper(curAligned[j]) == 'G') { g[j]++; }
166 else if(toupper(curAligned[j]) == 'C') { c[j]++; }
167 else if(curAligned[j] == '-' || curAligned[j] == '.') { gap[j]++; }
172 /**************************************************************************************/
174 int FilterSeqsCommand::execute() {
177 if (abort == true) { return 0; }
180 openInputFile(fastafile, inFASTA);
182 Sequence testSeq(inFASTA);
183 alignmentLength = testSeq.getAlignLength();
186 if(soft != 0 || isTrue(vertical)){
187 a.assign(alignmentLength, 0);
188 t.assign(alignmentLength, 0);
189 g.assign(alignmentLength, 0);
190 c.assign(alignmentLength, 0);
191 gap.assign(alignmentLength, 0);
194 if(hard.compare("") != 0) { doHard(); }
195 else { filter = string(alignmentLength, '1'); }
197 if(isTrue(vertical) || soft != 0){
199 while(!inFASTA.eof()){
200 Sequence seq(inFASTA);
202 if(isTrue(vertical) || soft != 0){ getFreqs(seq); }
210 if(isTrue(vertical) == 1) { doVertical(); }
211 if(soft != 0) { doSoft(); }
214 string filterFile = getRootName(fastafile) + "filter";
215 openOutputFile(filterFile, outFilter);
216 outFilter << filter << endl;
220 openInputFile(fastafile, inFASTA);
221 string filteredFasta = getRootName(fastafile) + "filter.fasta";
223 openOutputFile(filteredFasta, outFASTA);
226 while(!inFASTA.eof()){
227 Sequence seq(inFASTA);
228 string align = seq.getAligned();
229 string filterSeq = "";
231 for(int j=0;j<alignmentLength;j++){
232 if(filter[j] == '1'){
233 filterSeq += align[j];
237 outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
245 int filteredLength = 0;
246 for(int i=0;i<alignmentLength;i++){
247 if(filter[i] == '1'){ filteredLength++; }
251 cout << "Length of filtered alignment: " << filteredLength << endl;
252 cout << "Number of columns removed: " << alignmentLength-filteredLength << endl;
253 cout << "Length of the original alignment: " << alignmentLength << endl;
254 cout << "Number of sequences used to construct filter: " << numSeqs << endl;
261 catch(exception& e) {
262 cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
266 cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
271 /**************************************************************************************/