]> git.donarmstrong.com Git - mothur.git/blobdiff - chimera.cpp
modified mpi code to save ram by writing out every 10 seqs.
[mothur.git] / chimera.cpp
index 48e5763f2dbffe8e19a11e22cb8b897b34317865..7eeca96316511531532232d56519e1900cd7a9c6 100644 (file)
 
 #include "chimera.h"
 
+//***************************************************************************************************************
+//this is a vertical soft filter
+string Chimera::createFilter(vector<Sequence*> seqs, float t) {
+       try {
+               filterString = "";
+               int threshold = int (t * seqs.size());
+//cout << "threshhold = " << threshold << endl;
+               
+               vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
+               vector<int> a;          a.resize(seqs[0]->getAligned().length(), 0);
+               vector<int> t;          t.resize(seqs[0]->getAligned().length(), 0);
+               vector<int> g;          g.resize(seqs[0]->getAligned().length(), 0);
+               vector<int> c;          c.resize(seqs[0]->getAligned().length(), 0);
+       
+               filterString = (string(seqs[0]->getAligned().length(), '1'));
+               
+               //for each sequence
+               for (int i = 0; i < seqs.size(); i++) {
+               
+                       if (m->control_pressed) { return filterString; }
+               
+                       string seqAligned = seqs[i]->getAligned();
+               
+                       for (int j = 0; j < seqAligned.length(); j++) {
+                               //if this spot is a gap
+                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
+                               else if (toupper(seqAligned[j]) == 'A')                                 {       a[j]++;         }
+                               else if (toupper(seqAligned[j]) == 'T')                                 {       t[j]++;         }
+                               else if (toupper(seqAligned[j]) == 'G')                                 {       g[j]++;         }
+                               else if (toupper(seqAligned[j]) == 'C')                                 {       c[j]++;         }
+                       }
+               }
+               
+               //zero out spot where all sequences have blanks
+               //zero out spot where all sequences have blanks
+               int numColRemoved = 0;
+               for(int i = 0;i < seqs[0]->getAligned().length(); i++){
+               
+                       if (m->control_pressed) { return filterString; }
+                       
+                       if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
+                       
+                       else if (((a[i] < threshold) && (t[i] < threshold) && (g[i] < threshold) && (c[i] < threshold))) {      filterString[i] = '0';  numColRemoved++;  }
+                       //cout << "a = " << a[i] <<  " t = " << t[i] <<  " g = " << g[i] <<  " c = " << c[i] << endl;
+               }
+
+               m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();
+               return filterString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Chimera", "createFilter");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+map<int, int> Chimera::runFilter(Sequence* seq) {
+       try {
+               map<int, int> maskMap;
+               string seqAligned = seq->getAligned();
+               string newAligned = "";
+               int count = 0;
+                       
+               for (int j = 0; j < seqAligned.length(); j++) {
+                       //if this spot is a gap
+                       if (filterString[j] == '1') { 
+                               newAligned += seqAligned[j]; 
+                               maskMap[count] = j;
+                               count++;
+                       }
+               }
+                       
+               seq->setAligned(newAligned);
+               
+               return maskMap;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Chimera", "runFilter");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
+       
+               m->mothurOut("Reading sequences... "); cout.flush();
                ifstream in;
                openInputFile(file, in);
+               
                vector<Sequence*> container;
+               int count = 0;
+               length = 0;
+               unaligned = false;
                
                //read in seqs and store in vector
                while(!in.eof()){
                        
-                       Sequence* current = new Sequence(in);
-                       container.push_back(current);
-                       gobble(in);
+                       if (m->control_pressed) { return container; }
+                       
+                       Sequence* current = new Sequence(in);  gobble(in);
+                       
+                       if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
+                       else if (length != current->getAligned().length()) { //seqs are unaligned
+                               unaligned = true;
+                       }
+                       
+                       if (current->getName() != "") {  container.push_back(current);  }
                }
                
                in.close();
+               m->mothurOut("Done."); m->mothurOutEndLine();
+               
                return container;
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "readSeqs");
+               m->errorOut(e, "Chimera", "readSeqs");
                exit(1);
        }
 }
@@ -56,8 +151,80 @@ void Chimera::setMask(string filename) {
                }
        }
        catch(exception& e) {
-               errorOut(e, "Chimera", "setMask");
+               m->errorOut(e, "Chimera", "setMask");
                exit(1);
        }
 }
 //***************************************************************************************************************
+
+vector< vector<float> > Chimera::readQuantiles() {
+       try {
+       
+               ifstream in;
+               openInputFile(quanfile, in);
+               
+               vector< vector<float> > quan;
+               vector <float> temp; temp.resize(6, 0);
+               
+               //to fill 0
+               quan.push_back(temp); 
+       
+               int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
+               
+               while(!in.eof()){
+                       
+                       in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
+                       
+                       temp.clear();
+                       
+                       temp.push_back(ten); 
+                       temp.push_back(twentyfive);
+                       temp.push_back(fifty);
+                       temp.push_back(seventyfive);
+                       temp.push_back(ninetyfive);
+                       temp.push_back(ninetynine);
+                       
+                       quan.push_back(temp);  
+       
+                       gobble(in);
+               }
+               
+               in.close();
+               return quan;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Chimera", "readQuantiles");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+Sequence* Chimera::getSequence(string name) {
+       try{
+               Sequence* temp;
+               
+               //look through templateSeqs til you find it
+               int spot = -1;
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       if (name == templateSeqs[i]->getName()) {  
+                               spot = i;
+                               break;
+                       }
+               }
+               
+               if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
+               
+               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               
+               return temp;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Chimera", "getSequence");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+
+
+