]> git.donarmstrong.com Git - mothur.git/blobdiff - readblast.cpp
working on pam
[mothur.git] / readblast.cpp
index a6d23e95bfddf71afb20a2d4c2b2d8d1377db71d..84fddcf263cfdeb572f72ff3bd658325f31271e0 100644 (file)
@@ -16,12 +16,13 @@ inline bool compareOverlap(seqDist left, seqDist right){
        return (left.dist < right.dist);        
 } 
 /*********************************************************************************************/
-ReadBlast::ReadBlast(string file, float c, float p, int l, bool m, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(m), hclusterWanted(h) {
+ReadBlast::ReadBlast(string file, float c, float p, int l, bool ms, bool h) : blastfile(file), cutoff(c), penalty(p), length(l), minWanted(ms), hclusterWanted(h) {
        try {
+               m = MothurOut::getInstance();
                matrix = NULL;
        }
        catch(exception& e) {
-               errorOut(e, "ReadBlast", "ReadBlast");
+               m->errorOut(e, "ReadBlast", "ReadBlast");
                exit(1);
        }
 } 
@@ -29,15 +30,17 @@ ReadBlast::ReadBlast(string file, float c, float p, int l, bool m, bool h) : bla
 //assumptions about the blast file: 
 //1. if duplicate lines occur the first line is always best and is chosen
 //2. blast scores are grouped together, ie. a a .... score, a b .... score, a c ....score...
-void ReadBlast::read(NameAssignment* nameMap) {
+int ReadBlast::read(NameAssignment* nameMap) {
        try {
        
                //if the user has not given a names file read names from blastfile
                if (nameMap->size() == 0) { readNames(nameMap);  }
                int nseqs = nameMap->size();
+               
+               if (m->control_pressed) { return 0; }
 
                ifstream fileHandle;
-               openInputFile(blastfile, fileHandle);
+               m->openInputFile(blastfile, fileHandle);
                
                string firstName, secondName, eScore, currentRow;
                string repeatName = "";
@@ -51,15 +54,23 @@ void ReadBlast::read(NameAssignment* nameMap) {
                
                //create objects needed for read
                if (!hclusterWanted) {
-                       matrix = new SparseMatrix();
+                       matrix = new SparseDistanceMatrix();
+            matrix->resize(nseqs);
                }else{
-                       overlapFile = getRootName(blastfile) + "overlap.dist";
-                       distFile = getRootName(blastfile) + "hclusterDists.dist";
+                       overlapFile = m->getRootName(blastfile) + "overlap.dist";
+                       distFile = m->getRootName(blastfile) + "hclusterDists.dist";
                        
-                       openOutputFile(overlapFile, outOverlap);
-                       openOutputFile(distFile, outDist);
+                       m->openOutputFile(overlapFile, outOverlap);
+                       m->openOutputFile(distFile, outDist);
                }
-       
+               
+               if (m->control_pressed) { 
+                       fileHandle.close();
+                       if (!hclusterWanted) {  delete matrix; }
+                       else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile);  }
+                       return 0;
+               }
+               
                Progress* reading = new Progress("Reading blast:     ", nseqs * nseqs);
                
                //this is used to quickly find if we already have a distance for this combo
@@ -69,7 +80,7 @@ void ReadBlast::read(NameAssignment* nameMap) {
                if (!fileHandle.eof()) {
                        //read in line from file
                        fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
-                       gobble(fileHandle);
+                       m->gobble(fileHandle);
                        
                        currentRow = firstName;
                        lengthThisSeq = numBases;
@@ -80,8 +91,8 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                //convert name to number
                                map<string,int>::iterator itA = nameMap->find(firstName);
                                map<string,int>::iterator itB = nameMap->find(secondName);
-                               if(itA == nameMap->end()){   cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";  exit(1);  }
-                               if(itB == nameMap->end()){   cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                                
                                thisRowsBlastScores[itB->second] = score;
                                
@@ -98,16 +109,24 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                        }
                                }
                        }
-               }else { mothurOut("Error in your blast file, cannot read."); mothurOutEndLine(); exit(1); }
+               }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); }
 
-                               
+       
                //read file
                while(!fileHandle.eof()){  
+               
+                       if (m->control_pressed) { 
+                               fileHandle.close();
+                               if (!hclusterWanted) {  delete matrix; }
+                               else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile);  }
+                               delete reading;
+                               return 0;
+                       }
                        
                        //read in line from file
                        fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
                        //cout << firstName << '\t' << secondName << '\t' << percentId << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl;       
-                       gobble(fileHandle);
+                       m->gobble(fileHandle);
                        
                        string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file
                        
@@ -125,8 +144,8 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                                //convert name to number
                                                map<string,int>::iterator itA = nameMap->find(firstName);
                                                map<string,int>::iterator itB = nameMap->find(secondName);
-                                               if(itA == nameMap->end()){   cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";  exit(1);  }
-                                               if(itB == nameMap->end()){   cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                                                
                                                //save score
                                                thisRowsBlastScores[itB->second] = score;
@@ -151,6 +170,7 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                        map<int, float>::iterator itDist;
                                        for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) {  
                                                distance = 1.0 - (it->second / refScore);
+               
                                                
                                                //do we already have the distance calculated for b->a
                                                map<string,int>::iterator itA = nameMap->find(currentRow);
@@ -158,16 +178,22 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                                
                                                //if we have it then compare
                                                if (itDist != dists[it->first].end()) {
+       
                                                        //if you want the minimum blast score ratio, then pick max distance
                                                        if(minWanted) {  distance = max(itDist->second, distance);  }
                                                        else{   distance = min(itDist->second, distance);  }
-                                                       
+
                                                        //is this distance below cutoff
                                                        if (distance < cutoff) {
                                                                if (!hclusterWanted) {
-                                                                       PCell value(itA->second, it->first, distance);
-                                                                       matrix->addCell(value);
-                                                               }else{
+                                    if (itA->second < it->first) {
+                                        PDistCell value(it->first, distance);
+                                        matrix->addCell(itA->second, value);
+                                    }else {
+                                        PDistCell value(itA->second, distance);
+                                        matrix->addCell(it->first, value);
+                                    }
+                                }else{
                                                                        outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
                                                                }
                                                        }
@@ -190,8 +216,8 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                                //convert name to number
                                                map<string,int>::iterator itA = nameMap->find(firstName);
                                                map<string,int>::iterator itB = nameMap->find(secondName);
-                                               if(itA == nameMap->end()){   cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";  exit(1);  }
-                                               if(itB == nameMap->end()){   cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                                                
                                                thisRowsBlastScores[itB->second] = score;
                                                
@@ -232,8 +258,13 @@ void ReadBlast::read(NameAssignment* nameMap) {
                                //is this distance below cutoff
                                if (distance < cutoff) {
                                        if (!hclusterWanted) {
-                                               PCell value(itA->second, it->first, distance);
-                                               matrix->addCell(value);
+                                               if (itA->second < it->first) {
+                            PDistCell value(it->first, distance);
+                            matrix->addCell(itA->second, value);
+                        }else {
+                            PDistCell value(itA->second, distance);
+                            matrix->addCell(it->first, value);
+                        }
                                        }else{
                                                outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
                                        }
@@ -248,6 +279,14 @@ void ReadBlast::read(NameAssignment* nameMap) {
                thisRowsBlastScores.clear();
                dists.clear();
                
+               if (m->control_pressed) { 
+                               fileHandle.close();
+                               if (!hclusterWanted) {  delete matrix; }
+                               else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile);  }
+                               delete reading;
+                               return 0;
+               }
+               
                if (!hclusterWanted) {
                        sort(overlap.begin(), overlap.end(), compareOverlap);
                }else {
@@ -255,45 +294,66 @@ void ReadBlast::read(NameAssignment* nameMap) {
                        outOverlap.close();
                }
                
+               if (m->control_pressed) { 
+                               fileHandle.close();
+                               if (!hclusterWanted) {  delete matrix; }
+                               else {  m->mothurRemove(overlapFile);  m->mothurRemove(distFile);  }
+                               delete reading;
+                               return 0;
+               }
+               
                reading->finish();
                delete reading;
                fileHandle.close();
+               
+               return 0;
        }
        catch(exception& e) {
-               errorOut(e, "ReadBlast", "read");
+               m->errorOut(e, "ReadBlast", "read");
                exit(1);
        }
 } 
 /*********************************************************************************************/
-void ReadBlast::readNames(NameAssignment* nameMap) {
+int ReadBlast::readNames(NameAssignment* nameMap) {
        try {
-               mothurOut("Reading names... "); cout.flush();
+               m->mothurOut("Reading names... "); cout.flush();
                
                string name, hold, prevName;
                int num = 1;
                
                ifstream in;
-               openInputFile(blastfile, in);
+               m->openInputFile(blastfile, in);
+               
+               //ofstream outName;
+               //m->openOutputFile((blastfile + ".tempOutNames"), outName);
                
                //read first line
                in >> prevName;
+       
                for (int i = 0; i < 11; i++) {  in >> hold;  }
-               gobble(in);
-               
+               m->gobble(in);
+                               
                //save name in nameMap
                nameMap->push_back(prevName);
                
                while (!in.eof()) {
+                       if (m->control_pressed) { in.close(); return 0; }
                        
                        //read line
                        in >> name;
+       
                        for (int i = 0; i < 11; i++) {  in >> hold;  }
-                       gobble(in);
+                       m->gobble(in);
                        
                        //is this a new name?
                        if (name != prevName) {
                                prevName = name;
-                               nameMap->push_back(name);
+                
+                if (nameMap->get(name) != -1) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups.  Are you sequence names unique? quitting.\n"); m->control_pressed = true; }
+                else {
+                    nameMap->push_back(name);
+                }
+                //outName << name << '\t' << name << endl;
                                num++;
                        }
                }
@@ -301,17 +361,21 @@ void ReadBlast::readNames(NameAssignment* nameMap) {
                in.close();
                
                //write out names file
-               //string outNames = getRootName(blastfile) + "names";
+               //string outNames = m->getRootName(blastfile) + "names";
                //ofstream out;
-               //openOutputFile(outNames, out);
+               //m->openOutputFile(outNames, out);
                //nameMap->print(out);
                //out.close();
                
-               mothurOut(toString(num) + " names read."); mothurOutEndLine();
+               if (m->control_pressed) { return 0; }
+               
+               m->mothurOut(toString(num) + " names read."); m->mothurOutEndLine();
+               
+               return 0;
                
        }
        catch(exception& e) {
-               errorOut(e, "ReadBlast", "readNames");
+               m->errorOut(e, "ReadBlast", "readNames");
                exit(1);
        }
 }