]> git.donarmstrong.com Git - mothur.git/commitdiff
filterseqscommand added
authorryabin <ryabin>
Fri, 8 May 2009 16:26:50 +0000 (16:26 +0000)
committerryabin <ryabin>
Fri, 8 May 2009 16:26:50 +0000 (16:26 +0000)
filterseqscommand.cpp
globaldata.hpp
readseqscommand.cpp
sequence.cpp
sequence.hpp
sequencedb.cpp
sequencedb.h

index aff959ab9359ef0d199ac785d8c43a942ea59255..f92253d4b39d8cd444d60018843b70a27187e676 100644 (file)
 
 /**************************************************************************************/
 void FilterSeqsCommand::doTrump() {
-       //trump = globaldata->getTrump();
-//     
-//     for(int i = 0; i < db->size(); i++) {
-//             Sequence cur = db->get(i);
-//             string curAligned = cur.getAligned();
-//             
-//             for(int j = 0; j < curAligned.length-1; j++) {
-//                     string curChar = curAligned.substr(j, j+1);
-//                     
-//                     if(curChar.compare(trump) == 0) 
-//                             columnsToRemove[j] = true;
-//             }
-//     }
+       trump = globaldata->getTrump();
+       for(int i = 0; i < db->size(); i++) {
+               Sequence cur = db->get(i);
+               string curAligned = cur.getAligned();
+               for(int j = 0; j < curAligned.length(); j++) {
+                       string curChar = curAligned.substr(j, 1);
+                       if(curChar.compare(trump) == 0) 
+                               columnsToRemove[j] = true;
+               }
+       }
 }
 
 /**************************************************************************************/
 void FilterSeqsCommand::doSoft() {
-       //soft = atoi(globaldata->getSoft().c_str());
-//     vector<vector<int> > columnSymbolSums;
-//     vector<vector<string> > columnSymbols;
-//     for(int i = 0; i < db->get(0).getLength(); i++) {
-//             vector<string> symbols;
-//             vector<int> sums;
-//             columnSymbols[i] = symbols;
-//             columnSymbolSums[i] = sums;
-//     }
-//     
-//     for(int i = 0; i < db->size(); i++) {
-//             Sequence cur = db->get(i);
-//             string curAligned = cur.getAligned();
-//             
-//             for(int j = 0; j < curAligned.length-1; j++) {
-//                     string curChar = curAligned.substr(j, j+1);
-//                     vector<string> curColumnSymbols = columnSymbols[j];
-//                     
-//                     bool newSymbol = true;
-//                     
-//                     for(int k = 0; j < curColumnSymbols.size(); j++) 
-//                             if(curChar.compare(curColumnSymbols[k]) == 0) {
-//                                     newSymbol = false;
-//                                     columnSymbolSums[j][k]++;
-//                             }
-//                     
-//                     if(newSymbol) {
-//                             columnSymbols.push_back(curChar);
-//                             columnSymbolSums[j].push_back(1);
-//                     }
-//             }
-//     }
-//     
-//     for(int i = 0; i < columnSymbolSums.size(); i++) {
-//             int totalSum = 0;
-//             int max = 0;
-//             vector<int> curColumn = columnSymbolSums[i];
-//             
-//             for(int j = 0; j < curColumn.size(); j++) {
-//                     int curSum = curColumn[j];
-//                     if(curSum > max)
-//                             max = curSum;
-//                     totalSum += curSum;
-//             }
-//             
-//             if((double)max/(double)totalSum * 100 < soft)
-//                     columnsToRemove[i] = true;
-//     }
-}
-void FilterSeqsCommand::doFilter() {}
-/**************************************************************************************/
-int FilterSeqsCommand::execute() {     
-       try {
-               globaldata = GlobalData::getInstance();
-               filename = globaldata->inputFileName;
+       soft = atoi(globaldata->getSoft().c_str());
+       vector<vector<int> > columnSymbolSums;
+       vector<vector<string> > columnSymbols;
+       for(int i = 0; i < db->get(0).getLength(); i++) {
+               vector<string> symbols;
+               vector<int> sums;
+               columnSymbols.push_back(symbols);
+               columnSymbolSums.push_back(sums);
+       }
+       
+       for(int i = 0; i < db->size(); i++) {
+               Sequence cur = db->get(i);
+               string curAligned = cur.getAligned();
                
-               if(globaldata->getFastaFile().compare("") != 0) {
-                       readFasta = new ReadFasta(filename);
-                       readFasta->read();
-                       db = readFasta->getDB();
+               for(int j = 0; j < curAligned.length(); j++) {
+                       string curChar = curAligned.substr(j, 1);
+                       vector<string> curColumnSymbols = columnSymbols[j];
+                       bool newSymbol = true;
+                       
+                       for(int k = 0; k < curColumnSymbols.size(); k++) 
+                               if(curChar.compare(curColumnSymbols[k]) == 0) {
+                                       newSymbol = false;
+                                       columnSymbolSums[j][k]++;
+                               }
+                       
+                       if(newSymbol) {
+                               columnSymbols[j].push_back(curChar);
+                               columnSymbolSums[j].push_back(1);
+                       }
                }
+       }
+       
+       
+       for(int i = 0; i < columnSymbolSums.size(); i++) {
+               int totalSum = 0;
+               int max = 0;
+               vector<int> curColumnSymbols = columnSymbolSums[i];
                
-               else if(globaldata->getNexusFile().compare("") != 0) {
-                       readNexus = new ReadNexus(filename);
-                       readNexus->read();
-                       db = readNexus->getDB();
+               for(int j = 0; j < curColumnSymbols.size(); j++) {
+                       int curSum = curColumnSymbols[j];
+                       //cout << columnSymbols[i][j] << ": " << curSum << "\n";
+                       if(curSum > max)
+                               max = curSum;
+                       totalSum += curSum;
                }
+               //cout << "\n";
                
-               else if(globaldata->getClustalFile().compare("") != 0) {
-                       readClustal = new ReadClustal(filename);
-                       readClustal->read();
-                       db = readClustal->getDB();
-               }
+               if((double)max/(double)totalSum * 100 < soft)
+                       columnsToRemove[i] = true;
+       }
+}
 
-               else if(globaldata->getPhylipFile().compare("") != 0) {
-                       readPhylip = new ReadPhylip(filename);
-                       readPhylip->read();
-                       db = readPhylip->getDB();
-               }
+/**************************************************************************************/
+void FilterSeqsCommand::doFilter() {
+       filter = globaldata->getFilter();
+       ifstream filehandle;
+       openInputFile(filter, filehandle);
        
-               for(int i = 0; i < db->get(0).getLength(); i++) 
-                       columnsToRemove[i] = false;
-                       
-               // Trump
-               if(globaldata->getTrump().compare("") != 0) {
-               
-                       
-               }
-               
-               // Soft
-               if(globaldata->getSoft().compare("") != 0) {}
+       char c;
+       int count = 0;
+       while(!filehandle.eof()) {
+               c = filehandle.get();
+               if(c == '0') 
+                       columnsToRemove[count] = true;
+               count++;
+       }
+}
 
+/**************************************************************************************/
+int FilterSeqsCommand::execute() {     
+       try {
+               globaldata = GlobalData::getInstance();
+               db = globaldata->gSequenceDB;
+               
+               for(int i = 0; i < db->get(0).getLength(); i++) 
+                       columnsToRemove.push_back(false);
                
+                               
+               if(globaldata->getTrump().compare("") != 0) 
+                       doTrump();
+               else if(globaldata->getSoft().compare("") != 0)
+                       doSoft();
                        
+               else if(globaldata->getFilter().compare("") != 0) 
+                       doFilter();
                
-               // Filter
-               //if(globaldata->getFilter().compare("") != 0) {
-//
-//                     filter = globaldata->getFilter();
-//                     ifstream filehandle;
-//                     openInputFile(filter, filehandle);
-//                     
-//                     char c;
-//                     int count = 0;
-//                     while(!filehandle.eof()) {
-//                             c = filehandle.get();
-//                             if(c == '0') 
-//                                     columnsToRemove[count] = true;
-//                             count++;
-//                     }
+               //for(int i = 0; i < columnsToRemove.size(); i++)
+//             {
+//                     cout << "Remove Column " << i << " = ";
+//                     if(columnsToRemove[i])
+//                             cout << "true\n";
+//                     else
+//                             cout << "false\n";
 //             }
-               
-               
-               
+               //Creating the new SequenceDB 
+               SequenceDB newDB;
+               for(int i = 0; i < db->size(); i++) {
+                       Sequence curSeq = db->get(i);
+                       string curAligned = curSeq.getAligned();
+                       string curName = curSeq.getName();
+                       string newAligned = "";
+                       for(int j = 0; j < curAligned.length(); j++) 
+                               if(!columnsToRemove[j]) 
+                                       newAligned += curAligned.substr(j, 1);
                        
+                       Sequence newSeq(curName, newAligned);
+                       newDB.add(newSeq);
+               }
+               
+               ofstream outfile;
+               outfile.open("filtertest.txt");
+               newDB.print(outfile);
+               outfile.close();
                        
                return 0;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "Standard Error: " << e.what() << " has occurred in the FilterSeqsCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
        catch(...) {
-               cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "An unknown error has occurred in the FilterSeqsCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
 }
index 40b87583aaa2e472f788b2102f5daea0628c2f9e..aa68dc2c1dffd0fe90f82e8eabff554e0c8c0000 100644 (file)
@@ -23,6 +23,7 @@ class GroupMap;
 class TreeMap;
 class SAbundVector;
 class RAbundVector;
+class SequenceDB;
 
 class GlobalData {
 public:
@@ -39,6 +40,7 @@ public:
        GroupMap* gGroupmap;
        FullMatrix* gMatrix;
        TreeMap* gTreemap;
+       SequenceDB* gSequenceDB;
        string inputFileName, helpRequest, commandName, vertical;
        bool allLines;
        vector<string>  Estimators, Groups; //holds estimators to be used
index 919855b26bba0a3d09c115c7f92dad2f81f6d47f..2ca75679b3a9ca5869e380ccfc4ff4328336f0a4 100644 (file)
 ReadSeqsCommand::ReadSeqsCommand(){
        try {
                globaldata = GlobalData::getInstance();
-               
+               filename = globaldata->inputFileName;
+               if(globaldata->getFastaFile().compare("") != 0) {
+                       readFasta = new ReadFasta(filename);
+                       readFasta->read();
+                       globaldata->gSequenceDB = readFasta->getDB();
+               }
+               else if(globaldata->getNexusFile().compare("") != 0) {
+                       readNexus = new ReadNexus(filename);
+                       readNexus->read();
+                       globaldata->gSequenceDB = readNexus->getDB();
+               }
+               else if(globaldata->getClustalFile().compare("") != 0) {
+                       readClustal = new ReadClustal(filename);
+                       readClustal->read();
+                       globaldata->gSequenceDB = readClustal->getDB();
+               }
+               else if(globaldata->getPhylipFile().compare("") != 0) {
+                       readPhylip = new ReadPhylip(filename);
+                       readPhylip->read();
+                       globaldata->gSequenceDB = readPhylip->getDB();
+               }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "Standard Error: " << e.what() << " has occurred in the ReadSeqsCommand class Function ReadSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
        catch(...) {
-               cout << "An unknown error has occurred in the ReadOtuCommand class function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "An unknown error has occurred in the ReadSeqsCommand class function ReadSeqsCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
 }
@@ -41,7 +61,7 @@ int ReadSeqsCommand::execute(){
                filebuf fb;
                
                //fb.open ("fasta.txt",ios::out);
-//             readFasta->read();
+               //readFasta->read();
 //             SequenceDB* db = readFasta->getDB();
                
                //fb.open("nexus.txt",ios::out);
@@ -59,7 +79,7 @@ int ReadSeqsCommand::execute(){
                
                
                //for(int i = 0; i < db->size(); i++) {
-//                     cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getUnaligned() << "\n\n";
+//                     cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getAligned() << "\n\n";
 //             }
 
                //ostream os(&fb);
index b59363e13cc92f969145434d1ba8c0fa64573778..5b3b01d445dde50d485ca9c22ff2af843a2d79ce 100644 (file)
@@ -114,7 +114,7 @@ int Sequence::getLength(){
 
 //********************************************************************************************************************
 
-void Sequence::printSequence(ostream& out){
+void Sequence::printSequence(ofstream& out){
        string toPrint = unaligned;
        if(aligned.length() > unaligned.length())
                toPrint = aligned;
index 03cbab7ffddce5531e272cf6116069edfa0ebade..dea06bdca0bdcd5844c02c8e94694a3b9b8d28e4 100644 (file)
@@ -33,7 +33,7 @@ public:
        string getPairwise();
        string getUnaligned();
        int getLength();
-       void printSequence(ostream&);
+       void printSequence(ofstream&);
        
 private:
        string name;
index e8aade76284c73749af5799233a1ded3ef90f0a1..1f81ba82fb553bf54828216870121614214d2406 100644 (file)
@@ -72,7 +72,7 @@ int SequenceDB::size() {
 
 /***********************************************************************/
 
-void SequenceDB::print(ostream& out) {
+void SequenceDB::print(ofstream& out) {
        for(int i = 0; i < data.size(); i++)
                data[i].printSequence(out);
 }
index f31fc320c698de446cae90f6eb11e4c83bf96eb9..35636bf492d78b809dbe0b010f2d1ecf1abab050 100644 (file)
@@ -36,7 +36,7 @@ public:
        void changeSize(int);      //resizes data
        void clear();              //clears data - remeber to loop through and delete the sequences inside or you will have a memory leak
        int size();                //returns datas size
-       void print(ostream&);      //loops through data using sequence class print
+       void print(ofstream&);      //loops through data using sequence class print
                
 private:
        vector<Sequence> data;