From a5a908953ab2ebb9386a895e5ddddf0135ad1f99 Mon Sep 17 00:00:00 2001 From: ryabin Date: Fri, 8 May 2009 16:26:50 +0000 Subject: [PATCH] filterseqscommand added --- filterseqscommand.cpp | 229 +++++++++++++++++++++--------------------- globaldata.hpp | 2 + readseqscommand.cpp | 30 +++++- sequence.cpp | 2 +- sequence.hpp | 2 +- sequencedb.cpp | 2 +- sequencedb.h | 2 +- 7 files changed, 143 insertions(+), 126 deletions(-) diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp index aff959a..f92253d 100644 --- a/filterseqscommand.cpp +++ b/filterseqscommand.cpp @@ -13,147 +13,142 @@ /**************************************************************************************/ void FilterSeqsCommand::doTrump() { - //trump = globaldata->getTrump(); -// -// for(int i = 0; i < db->size(); i++) { -// Sequence cur = db->get(i); -// string curAligned = cur.getAligned(); -// -// for(int j = 0; j < curAligned.length-1; j++) { -// string curChar = curAligned.substr(j, j+1); -// -// if(curChar.compare(trump) == 0) -// columnsToRemove[j] = true; -// } -// } + trump = globaldata->getTrump(); + for(int i = 0; i < db->size(); i++) { + Sequence cur = db->get(i); + string curAligned = cur.getAligned(); + for(int j = 0; j < curAligned.length(); j++) { + string curChar = curAligned.substr(j, 1); + if(curChar.compare(trump) == 0) + columnsToRemove[j] = true; + } + } } /**************************************************************************************/ void FilterSeqsCommand::doSoft() { - //soft = atoi(globaldata->getSoft().c_str()); -// vector > columnSymbolSums; -// vector > columnSymbols; -// for(int i = 0; i < db->get(0).getLength(); i++) { -// vector symbols; -// vector sums; -// columnSymbols[i] = symbols; -// columnSymbolSums[i] = sums; -// } -// -// for(int i = 0; i < db->size(); i++) { -// Sequence cur = db->get(i); -// string curAligned = cur.getAligned(); -// -// for(int j = 0; j < curAligned.length-1; j++) { -// string curChar = curAligned.substr(j, j+1); -// vector curColumnSymbols = columnSymbols[j]; -// -// bool newSymbol = true; -// -// for(int k = 0; j < curColumnSymbols.size(); j++) -// if(curChar.compare(curColumnSymbols[k]) == 0) { -// newSymbol = false; -// columnSymbolSums[j][k]++; -// } -// -// if(newSymbol) { -// columnSymbols.push_back(curChar); -// columnSymbolSums[j].push_back(1); -// } -// } -// } -// -// for(int i = 0; i < columnSymbolSums.size(); i++) { -// int totalSum = 0; -// int max = 0; -// vector curColumn = columnSymbolSums[i]; -// -// for(int j = 0; j < curColumn.size(); j++) { -// int curSum = curColumn[j]; -// if(curSum > max) -// max = curSum; -// totalSum += curSum; -// } -// -// if((double)max/(double)totalSum * 100 < soft) -// columnsToRemove[i] = true; -// } -} -void FilterSeqsCommand::doFilter() {} -/**************************************************************************************/ -int FilterSeqsCommand::execute() { - try { - globaldata = GlobalData::getInstance(); - filename = globaldata->inputFileName; + soft = atoi(globaldata->getSoft().c_str()); + vector > columnSymbolSums; + vector > columnSymbols; + for(int i = 0; i < db->get(0).getLength(); i++) { + vector symbols; + vector sums; + columnSymbols.push_back(symbols); + columnSymbolSums.push_back(sums); + } + + for(int i = 0; i < db->size(); i++) { + Sequence cur = db->get(i); + string curAligned = cur.getAligned(); - if(globaldata->getFastaFile().compare("") != 0) { - readFasta = new ReadFasta(filename); - readFasta->read(); - db = readFasta->getDB(); + for(int j = 0; j < curAligned.length(); j++) { + string curChar = curAligned.substr(j, 1); + vector curColumnSymbols = columnSymbols[j]; + bool newSymbol = true; + + for(int k = 0; k < curColumnSymbols.size(); k++) + if(curChar.compare(curColumnSymbols[k]) == 0) { + newSymbol = false; + columnSymbolSums[j][k]++; + } + + if(newSymbol) { + columnSymbols[j].push_back(curChar); + columnSymbolSums[j].push_back(1); + } } + } + + + for(int i = 0; i < columnSymbolSums.size(); i++) { + int totalSum = 0; + int max = 0; + vector curColumnSymbols = columnSymbolSums[i]; - else if(globaldata->getNexusFile().compare("") != 0) { - readNexus = new ReadNexus(filename); - readNexus->read(); - db = readNexus->getDB(); + for(int j = 0; j < curColumnSymbols.size(); j++) { + int curSum = curColumnSymbols[j]; + //cout << columnSymbols[i][j] << ": " << curSum << "\n"; + if(curSum > max) + max = curSum; + totalSum += curSum; } + //cout << "\n"; - else if(globaldata->getClustalFile().compare("") != 0) { - readClustal = new ReadClustal(filename); - readClustal->read(); - db = readClustal->getDB(); - } + if((double)max/(double)totalSum * 100 < soft) + columnsToRemove[i] = true; + } +} - else if(globaldata->getPhylipFile().compare("") != 0) { - readPhylip = new ReadPhylip(filename); - readPhylip->read(); - db = readPhylip->getDB(); - } +/**************************************************************************************/ +void FilterSeqsCommand::doFilter() { + filter = globaldata->getFilter(); + ifstream filehandle; + openInputFile(filter, filehandle); - for(int i = 0; i < db->get(0).getLength(); i++) - columnsToRemove[i] = false; - - // Trump - if(globaldata->getTrump().compare("") != 0) { - - - } - - // Soft - if(globaldata->getSoft().compare("") != 0) {} + char c; + int count = 0; + while(!filehandle.eof()) { + c = filehandle.get(); + if(c == '0') + columnsToRemove[count] = true; + count++; + } +} +/**************************************************************************************/ +int FilterSeqsCommand::execute() { + try { + globaldata = GlobalData::getInstance(); + db = globaldata->gSequenceDB; + + for(int i = 0; i < db->get(0).getLength(); i++) + columnsToRemove.push_back(false); + + if(globaldata->getTrump().compare("") != 0) + doTrump(); + else if(globaldata->getSoft().compare("") != 0) + doSoft(); + else if(globaldata->getFilter().compare("") != 0) + doFilter(); - // Filter - //if(globaldata->getFilter().compare("") != 0) { -// -// filter = globaldata->getFilter(); -// ifstream filehandle; -// openInputFile(filter, filehandle); -// -// char c; -// int count = 0; -// while(!filehandle.eof()) { -// c = filehandle.get(); -// if(c == '0') -// columnsToRemove[count] = true; -// count++; -// } + //for(int i = 0; i < columnsToRemove.size(); i++) +// { +// cout << "Remove Column " << i << " = "; +// if(columnsToRemove[i]) +// cout << "true\n"; +// else +// cout << "false\n"; // } - - - + //Creating the new SequenceDB + SequenceDB newDB; + for(int i = 0; i < db->size(); i++) { + Sequence curSeq = db->get(i); + string curAligned = curSeq.getAligned(); + string curName = curSeq.getName(); + string newAligned = ""; + for(int j = 0; j < curAligned.length(); j++) + if(!columnsToRemove[j]) + newAligned += curAligned.substr(j, 1); + Sequence newSeq(curName, newAligned); + newDB.add(newSeq); + } + + ofstream outfile; + outfile.open("filtertest.txt"); + newDB.print(outfile); + outfile.close(); return 0; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/globaldata.hpp b/globaldata.hpp index 40b8758..aa68dc2 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -23,6 +23,7 @@ class GroupMap; class TreeMap; class SAbundVector; class RAbundVector; +class SequenceDB; class GlobalData { public: @@ -39,6 +40,7 @@ public: GroupMap* gGroupmap; FullMatrix* gMatrix; TreeMap* gTreemap; + SequenceDB* gSequenceDB; string inputFileName, helpRequest, commandName, vertical; bool allLines; vector Estimators, Groups; //holds estimators to be used diff --git a/readseqscommand.cpp b/readseqscommand.cpp index 919855b..2ca7567 100644 --- a/readseqscommand.cpp +++ b/readseqscommand.cpp @@ -13,14 +13,34 @@ ReadSeqsCommand::ReadSeqsCommand(){ try { globaldata = GlobalData::getInstance(); - + filename = globaldata->inputFileName; + if(globaldata->getFastaFile().compare("") != 0) { + readFasta = new ReadFasta(filename); + readFasta->read(); + globaldata->gSequenceDB = readFasta->getDB(); + } + else if(globaldata->getNexusFile().compare("") != 0) { + readNexus = new ReadNexus(filename); + readNexus->read(); + globaldata->gSequenceDB = readNexus->getDB(); + } + else if(globaldata->getClustalFile().compare("") != 0) { + readClustal = new ReadClustal(filename); + readClustal->read(); + globaldata->gSequenceDB = readClustal->getDB(); + } + else if(globaldata->getPhylipFile().compare("") != 0) { + readPhylip = new ReadPhylip(filename); + readPhylip->read(); + globaldata->gSequenceDB = readPhylip->getDB(); + } } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ReadSeqsCommand class Function ReadSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the ReadOtuCommand class function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ReadSeqsCommand class function ReadSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } @@ -41,7 +61,7 @@ int ReadSeqsCommand::execute(){ filebuf fb; //fb.open ("fasta.txt",ios::out); -// readFasta->read(); + //readFasta->read(); // SequenceDB* db = readFasta->getDB(); //fb.open("nexus.txt",ios::out); @@ -59,7 +79,7 @@ int ReadSeqsCommand::execute(){ //for(int i = 0; i < db->size(); i++) { -// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getUnaligned() << "\n\n"; +// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getAligned() << "\n\n"; // } //ostream os(&fb); diff --git a/sequence.cpp b/sequence.cpp index b59363e..5b3b01d 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -114,7 +114,7 @@ int Sequence::getLength(){ //******************************************************************************************************************** -void Sequence::printSequence(ostream& out){ +void Sequence::printSequence(ofstream& out){ string toPrint = unaligned; if(aligned.length() > unaligned.length()) toPrint = aligned; diff --git a/sequence.hpp b/sequence.hpp index 03cbab7..dea06bd 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -33,7 +33,7 @@ public: string getPairwise(); string getUnaligned(); int getLength(); - void printSequence(ostream&); + void printSequence(ofstream&); private: string name; diff --git a/sequencedb.cpp b/sequencedb.cpp index e8aade7..1f81ba8 100644 --- a/sequencedb.cpp +++ b/sequencedb.cpp @@ -72,7 +72,7 @@ int SequenceDB::size() { /***********************************************************************/ -void SequenceDB::print(ostream& out) { +void SequenceDB::print(ofstream& out) { for(int i = 0; i < data.size(); i++) data[i].printSequence(out); } diff --git a/sequencedb.h b/sequencedb.h index f31fc32..35636bf 100644 --- a/sequencedb.h +++ b/sequencedb.h @@ -36,7 +36,7 @@ public: void changeSize(int); //resizes data void clear(); //clears data - remeber to loop through and delete the sequences inside or you will have a memory leak int size(); //returns datas size - void print(ostream&); //loops through data using sequence class print + void print(ofstream&); //loops through data using sequence class print private: vector data; -- 2.39.2