]> git.donarmstrong.com Git - mothur.git/blobdiff - chimera.cpp
modified mpi code to save ram by writing out every 10 seqs.
[mothur.git] / chimera.cpp
index bf16de4e34b795694d1a894ea1005f32353be110..7eeca96316511531532232d56519e1900cd7a9c6 100644 (file)
 
 //***************************************************************************************************************
 //this is a vertical soft filter
-void Chimera::createFilter(vector<Sequence*> seqs) {
+string Chimera::createFilter(vector<Sequence*> seqs, float t) {
        try {
                filterString = "";
-               int threshold = int (0.5 * seqs.size());
+               int threshold = int (t * seqs.size());
 //cout << "threshhold = " << threshold << endl;
                
                vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
@@ -22,14 +22,16 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
                vector<int> t;          t.resize(seqs[0]->getAligned().length(), 0);
                vector<int> g;          g.resize(seqs[0]->getAligned().length(), 0);
                vector<int> c;          c.resize(seqs[0]->getAligned().length(), 0);
-               
+       
                filterString = (string(seqs[0]->getAligned().length(), '1'));
                
                //for each sequence
                for (int i = 0; i < seqs.size(); i++) {
                
+                       if (m->control_pressed) { return filterString; }
+               
                        string seqAligned = seqs[i]->getAligned();
-                       
+               
                        for (int j = 0; j < seqAligned.length(); j++) {
                                //if this spot is a gap
                                if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
@@ -44,37 +46,46 @@ void Chimera::createFilter(vector<Sequence*> seqs) {
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
                for(int i = 0;i < seqs[0]->getAligned().length(); i++){
+               
+                       if (m->control_pressed) { return filterString; }
+                       
                        if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
                        
                        else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) {      filterString[i] = '0';  numColRemoved++;  }
                        //cout << "a = " << a[i] <<  " t = " << t[i] <<  " g = " << g[i] <<  " c = " << c[i] << endl;
                }
-       
-//cout << "filter = " << filterString << endl; 
 
-               mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  mothurOutEndLine();
+               m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();
+               return filterString;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "createFilter");
+               m->errorOut(e, "Chimera", "createFilter");
                exit(1);
        }
 }
 //***************************************************************************************************************
-void Chimera::runFilter(Sequence* seq) {
+map<int, int> Chimera::runFilter(Sequence* seq) {
        try {
-               
+               map<int, int> maskMap;
                string seqAligned = seq->getAligned();
                string newAligned = "";
+               int count = 0;
                        
                for (int j = 0; j < seqAligned.length(); j++) {
                        //if this spot is a gap
-                       if (filterString[j] == '1') { newAligned += seqAligned[j]; }
+                       if (filterString[j] == '1') { 
+                               newAligned += seqAligned[j]; 
+                               maskMap[count] = j;
+                               count++;
+                       }
                }
                        
                seq->setAligned(newAligned);
+               
+               return maskMap;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "runFilter");
+               m->errorOut(e, "Chimera", "runFilter");
                exit(1);
        }
 }
@@ -82,17 +93,20 @@ void Chimera::runFilter(Sequence* seq) {
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
        
-               mothurOut("Reading sequences... "); cout.flush();
+               m->mothurOut("Reading sequences... "); cout.flush();
                ifstream in;
                openInputFile(file, in);
+               
                vector<Sequence*> container;
                int count = 0;
-               int length = 0;
+               length = 0;
                unaligned = false;
                
                //read in seqs and store in vector
                while(!in.eof()){
                        
+                       if (m->control_pressed) { return container; }
+                       
                        Sequence* current = new Sequence(in);  gobble(in);
                        
                        if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
@@ -104,12 +118,12 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                }
                
                in.close();
-               mothurOut("Done."); mothurOutEndLine();
+               m->mothurOut("Done."); m->mothurOutEndLine();
                
                return container;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "readSeqs");
+               m->errorOut(e, "Chimera", "readSeqs");
                exit(1);
        }
 }
@@ -137,7 +151,7 @@ void Chimera::setMask(string filename) {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "setMask");
+               m->errorOut(e, "Chimera", "setMask");
                exit(1);
        }
 }
@@ -180,7 +194,7 @@ vector< vector<float> > Chimera::readQuantiles() {
                
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "readQuantiles");
+               m->errorOut(e, "Chimera", "readQuantiles");
                exit(1);
        }
 }
@@ -198,14 +212,14 @@ Sequence* Chimera::getSequence(string name) {
                        }
                }
                
-               if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+               if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
                
                temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
                
                return temp;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "getSequence");
+               m->errorOut(e, "Chimera", "getSequence");
                exit(1);
        }
 }