]> git.donarmstrong.com Git - mothur.git/blobdiff - chimera.cpp
modified mpi code to save ram by writing out every 10 seqs.
[mothur.git] / chimera.cpp
index 7e997b9824439e4e42f495f95a2d101b58a89600..7eeca96316511531532232d56519e1900cd7a9c6 100644 (file)
 #include "chimera.h"
 
 //***************************************************************************************************************
-//this is a vertical filter
-void Chimera::createFilter(vector<Sequence*> seqs) {
+//this is a vertical soft filter
+string Chimera::createFilter(vector<Sequence*> seqs, float t) {
        try {
-               
-               int threshold = int (0.5 * seqs.size());
+               filterString = "";
+               int threshold = int (t * seqs.size());
 //cout << "threshhold = " << threshold << endl;
                
                vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
@@ -22,14 +22,16 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
                vector<int> t;          t.resize(seqs[0]->getAligned().length(), 0);
                vector<int> g;          g.resize(seqs[0]->getAligned().length(), 0);
                vector<int> c;          c.resize(seqs[0]->getAligned().length(), 0);
-               
+       
                filterString = (string(seqs[0]->getAligned().length(), '1'));
                
                //for each sequence
                for (int i = 0; i < seqs.size(); i++) {
                
+                       if (m->control_pressed) { return filterString; }
+               
                        string seqAligned = seqs[i]->getAligned();
-                       
+               
                        for (int j = 0; j < seqAligned.length(); j++) {
                                //if this spot is a gap
                                if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
@@ -40,68 +42,88 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
                        }
                }
                
+               //zero out spot where all sequences have blanks
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
                for(int i = 0;i < seqs[0]->getAligned().length(); i++){
+               
+                       if (m->control_pressed) { return filterString; }
+                       
                        if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
                        
                        else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) {      filterString[i] = '0';  numColRemoved++;  }
                        //cout << "a = " << a[i] <<  " t = " << t[i] <<  " g = " << g[i] <<  " c = " << c[i] << endl;
                }
-//cout << "filter = " << filterString << endl; 
 
-               mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  mothurOutEndLine();
+               m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();
+               return filterString;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "createFilter");
+               m->errorOut(e, "Chimera", "createFilter");
                exit(1);
        }
 }
 //***************************************************************************************************************
-void Chimera::runFilter(vector<Sequence*> seqs) {
+map<int, int> Chimera::runFilter(Sequence* seq) {
        try {
-               
-               //for each sequence
-               for (int i = 0; i < seqs.size(); i++) {
-               
-                       string seqAligned = seqs[i]->getAligned();
-                       string newAligned = "";
+               map<int, int> maskMap;
+               string seqAligned = seq->getAligned();
+               string newAligned = "";
+               int count = 0;
                        
-                       for (int j = 0; j < seqAligned.length(); j++) {
-                               //if this spot is a gap
-                               if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+               for (int j = 0; j < seqAligned.length(); j++) {
+                       //if this spot is a gap
+                       if (filterString[j] == '1') { 
+                               newAligned += seqAligned[j]; 
+                               maskMap[count] = j;
+                               count++;
                        }
-                       
-                       seqs[i]->setAligned(newAligned);
                }
+                       
+               seq->setAligned(newAligned);
                
+               return maskMap;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "runFilter");
+               m->errorOut(e, "Chimera", "runFilter");
                exit(1);
        }
 }
-
 //***************************************************************************************************************
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
+       
+               m->mothurOut("Reading sequences... "); cout.flush();
                ifstream in;
                openInputFile(file, in);
+               
                vector<Sequence*> container;
+               int count = 0;
+               length = 0;
+               unaligned = false;
                
                //read in seqs and store in vector
                while(!in.eof()){
                        
-                       Sequence* current = new Sequence(in);
-                       container.push_back(current);
-                       gobble(in);
+                       if (m->control_pressed) { return container; }
+                       
+                       Sequence* current = new Sequence(in);  gobble(in);
+                       
+                       if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
+                       else if (length != current->getAligned().length()) { //seqs are unaligned
+                               unaligned = true;
+                       }
+                       
+                       if (current->getName() != "") {  container.push_back(current);  }
                }
                
                in.close();
+               m->mothurOut("Done."); m->mothurOutEndLine();
+               
                return container;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "readSeqs");
+               m->errorOut(e, "Chimera", "readSeqs");
                exit(1);
        }
 }
@@ -129,7 +151,7 @@ void Chimera::setMask(string filename) {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "setMask");
+               m->errorOut(e, "Chimera", "setMask");
                exit(1);
        }
 }
@@ -142,7 +164,7 @@ vector< vector<float> > Chimera::readQuantiles() {
                openInputFile(quanfile, in);
                
                vector< vector<float> > quan;
-               vector <float> temp;
+               vector <float> temp; temp.resize(6, 0);
                
                //to fill 0
                quan.push_back(temp); 
@@ -172,7 +194,32 @@ vector< vector<float> > Chimera::readQuantiles() {
                
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "readQuantiles");
+               m->errorOut(e, "Chimera", "readQuantiles");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+Sequence* Chimera::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Chimera", "getSequence");
                exit(1);
        }
 }