]> git.donarmstrong.com Git - mothur.git/blobdiff - chimera.cpp
added MPI code, broke up chimera.seqs into 5 separated commands, added parse.sff...
[mothur.git] / chimera.cpp
index 7eeca96316511531532232d56519e1900cd7a9c6..692a4fee837376232981cfaf91934b96b5efa75d 100644 (file)
@@ -42,7 +42,6 @@ string Chimera::createFilter(vector<Sequence*> seqs, float t) {
                        }
                }
                
-               //zero out spot where all sequences have blanks
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
                for(int i = 0;i < seqs[0]->getAligned().length(); i++){
@@ -55,7 +54,8 @@ string Chimera::createFilter(vector<Sequence*> seqs, float t) {
                        //cout << "a = " << a[i] <<  " t = " << t[i] <<  " g = " << g[i] <<  " c = " << c[i] << endl;
                }
 
-               m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();
+               if (threshold != 0.0) {  m->mothurOut("Filter removed " + toString(numColRemoved) + " columns.");  m->mothurOutEndLine();  }
+               
                return filterString;
        }
        catch(exception& e) {
@@ -93,14 +93,68 @@ map<int, int> Chimera::runFilter(Sequence* seq) {
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
        
-               m->mothurOut("Reading sequences... "); cout.flush();
-               ifstream in;
-               openInputFile(file, in);
-               
                vector<Sequence*> container;
                int count = 0;
                length = 0;
                unaligned = false;
+
+               m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+               
+               #ifdef USE_MPI  
+                       int pid;
+                       vector<long> positions;
+                       int numSeqs;
+               
+                       MPI_Status status; 
+                       MPI_File inMPI;
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+       
+                       char inFileName[file.length()];
+                       strcpy(inFileName, file.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+       
+                       if (pid == 0) {
+                               positions = setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
+
+                               //send file positions to all processes
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
+                       }else{
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
+                               positions.resize(numSeqs+1);
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                       }
+                       
+                       //read file 
+                       for(int i=0;i<numSeqs;i++){
+                       
+                               if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
+       
+                               //read next sequence
+                               int seqlength = positions[i+1] - positions[i];
+                               char buf4[seqlength];
+                               MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+                               
+                               string tempBuf = buf4;
+                               if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+                               
+                               istringstream iss (tempBuf,istringstream::in);
+               
+                               Sequence* current = new Sequence(iss);   
+                               if (current->getName() != "") {
+                                       if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
+                                       else if (length != current->getAligned().length()) {    unaligned = true;       }
+                       
+                                       container.push_back(current);  
+                               }
+                       }
+                       
+                       MPI_File_close(&inMPI);
+       #else
+
+               ifstream in;
+               openInputFile(file, in);
                
                //read in seqs and store in vector
                while(!in.eof()){
@@ -110,14 +164,13 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                        Sequence* current = new Sequence(in);  gobble(in);
                        
                        if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
-                       else if (length != current->getAligned().length()) { //seqs are unaligned
-                               unaligned = true;
-                       }
-                       
+                       else if (length != current->getAligned().length()) {    unaligned = true;       }
+                                               
                        if (current->getName() != "") {  container.push_back(current);  }
                }
-               
                in.close();
+       #endif
+       
                m->mothurOut("Done."); m->mothurOutEndLine();
                
                return container;
@@ -137,64 +190,53 @@ void Chimera::setMask(string filename) {
                }else if (filename == "") {  //do nothing 
                        seqMask = "";
                }else{
+               
+       #ifdef USE_MPI  
+                       MPI_File inMPI;
+                       MPI_Offset size;
+                       MPI_Status status;
+                       
+                       char inFileName[filename.length()];
+                       strcpy(inFileName, filename.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       MPI_File_get_size(inMPI, &size);
+                       
+                       char buffer[size];
+                       MPI_File_read(inMPI, buffer, size, MPI_CHAR, &status);
+                       
+                       string tempBuf = buffer;
+                       if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
+                       istringstream iss (tempBuf,istringstream::in);
+                       
+                       if (!iss.eof()) {
+                               Sequence temp(iss);
+                               seqMask = temp.getAligned();
+                       }else {
+                               m->mothurOut("Problem with mask."); m->mothurOutEndLine(); 
+                               seqMask = "";
+                       }
+                       
+                       MPI_File_close(&inMPI);
+       #else
+       
                        ifstream infile;
                        openInputFile(filename, infile);
                        
-                       while (!infile.eof()) {
+                       if (!infile.eof()) {
                                Sequence temp(infile);
                                seqMask = temp.getAligned();
-                               
-                               gobble(infile);
+                       }else {
+                               m->mothurOut("Problem with mask."); m->mothurOutEndLine(); 
+                               seqMask = "";
                        }
-                       
                        infile.close();
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Chimera", "setMask");
-               exit(1);
-       }
-}
-//***************************************************************************************************************
-
-vector< vector<float> > Chimera::readQuantiles() {
-       try {
-       
-               ifstream in;
-               openInputFile(quanfile, in);
-               
-               vector< vector<float> > quan;
-               vector <float> temp; temp.resize(6, 0);
-               
-               //to fill 0
-               quan.push_back(temp); 
-       
-               int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
-               
-               while(!in.eof()){
-                       
-                       in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
-                       
-                       temp.clear();
-                       
-                       temp.push_back(ten); 
-                       temp.push_back(twentyfive);
-                       temp.push_back(fifty);
-                       temp.push_back(seventyfive);
-                       temp.push_back(ninetyfive);
-                       temp.push_back(ninetynine);
-                       
-                       quan.push_back(temp);  
+       #endif
        
-                       gobble(in);
                }
-               
-               in.close();
-               return quan;
-               
        }
        catch(exception& e) {
-               m->errorOut(e, "Chimera", "readQuantiles");
+               m->errorOut(e, "Chimera", "setMask");
                exit(1);
        }
 }