]> git.donarmstrong.com Git - mothur.git/blobdiff - readblast.cpp
working on pam
[mothur.git] / readblast.cpp
index c3dfd4884f0d01d6c9dbb98c17aa9a8341a44c5d..84fddcf263cfdeb572f72ff3bd658325f31271e0 100644 (file)
@@ -40,7 +40,7 @@ int ReadBlast::read(NameAssignment* nameMap) {
                if (m->control_pressed) { return 0; }
 
                ifstream fileHandle;
-               openInputFile(blastfile, fileHandle);
+               m->openInputFile(blastfile, fileHandle);
                
                string firstName, secondName, eScore, currentRow;
                string repeatName = "";
@@ -54,19 +54,20 @@ int ReadBlast::read(NameAssignment* nameMap) {
                
                //create objects needed for read
                if (!hclusterWanted) {
-                       matrix = new SparseMatrix();
+                       matrix = new SparseDistanceMatrix();
+            matrix->resize(nseqs);
                }else{
-                       overlapFile = getRootName(blastfile) + "overlap.dist";
-                       distFile = getRootName(blastfile) + "hclusterDists.dist";
+                       overlapFile = m->getRootName(blastfile) + "overlap.dist";
+                       distFile = m->getRootName(blastfile) + "hclusterDists.dist";
                        
-                       openOutputFile(overlapFile, outOverlap);
-                       openOutputFile(distFile, outDist);
+                       m->openOutputFile(overlapFile, outOverlap);
+                       m->openOutputFile(distFile, outDist);
                }
                
                if (m->control_pressed) { 
                        fileHandle.close();
                        if (!hclusterWanted) {  delete matrix; }
-                       else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str());  }
+                       else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile);  }
                        return 0;
                }
                
@@ -79,7 +80,7 @@ int ReadBlast::read(NameAssignment* nameMap) {
                if (!fileHandle.eof()) {
                        //read in line from file
                        fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
-                       gobble(fileHandle);
+                       m->gobble(fileHandle);
                        
                        currentRow = firstName;
                        lengthThisSeq = numBases;
@@ -90,8 +91,8 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                //convert name to number
                                map<string,int>::iterator itA = nameMap->find(firstName);
                                map<string,int>::iterator itB = nameMap->find(secondName);
-                               if(itA == nameMap->end()){   cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";  exit(1);  }
-                               if(itB == nameMap->end()){   cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                                
                                thisRowsBlastScores[itB->second] = score;
                                
@@ -110,14 +111,14 @@ int ReadBlast::read(NameAssignment* nameMap) {
                        }
                }else { m->mothurOut("Error in your blast file, cannot read."); m->mothurOutEndLine(); exit(1); }
 
-                               
+       
                //read file
                while(!fileHandle.eof()){  
                
                        if (m->control_pressed) { 
                                fileHandle.close();
                                if (!hclusterWanted) {  delete matrix; }
-                               else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str());  }
+                               else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile);  }
                                delete reading;
                                return 0;
                        }
@@ -125,7 +126,7 @@ int ReadBlast::read(NameAssignment* nameMap) {
                        //read in line from file
                        fileHandle >> firstName >> secondName >> percentId >> numBases >> mismatch >> gap >> startQuery >> endQuery >> startRef >> endRef >> eScore >> score;
                        //cout << firstName << '\t' << secondName << '\t' << percentId << '\t' << numBases << '\t' << mismatch << '\t' << gap << '\t' << startQuery << '\t' << endQuery << '\t' << startRef << '\t' << endRef << '\t' << eScore << '\t' << score << endl;       
-                       gobble(fileHandle);
+                       m->gobble(fileHandle);
                        
                        string temp = firstName + secondName; //to check if this file has repeat lines, ie. is this a blast instead of a blscreen file
                        
@@ -143,8 +144,8 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                                //convert name to number
                                                map<string,int>::iterator itA = nameMap->find(firstName);
                                                map<string,int>::iterator itB = nameMap->find(secondName);
-                                               if(itA == nameMap->end()){   cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";  exit(1);  }
-                                               if(itB == nameMap->end()){   cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                                                
                                                //save score
                                                thisRowsBlastScores[itB->second] = score;
@@ -185,9 +186,14 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                                        //is this distance below cutoff
                                                        if (distance < cutoff) {
                                                                if (!hclusterWanted) {
-                                                                       PCell value(itA->second, it->first, distance);
-                                                                       matrix->addCell(value);
-                                                               }else{
+                                    if (itA->second < it->first) {
+                                        PDistCell value(it->first, distance);
+                                        matrix->addCell(itA->second, value);
+                                    }else {
+                                        PDistCell value(itA->second, distance);
+                                        matrix->addCell(it->first, value);
+                                    }
+                                }else{
                                                                        outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
                                                                }
                                                        }
@@ -210,8 +216,8 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                                //convert name to number
                                                map<string,int>::iterator itA = nameMap->find(firstName);
                                                map<string,int>::iterator itB = nameMap->find(secondName);
-                                               if(itA == nameMap->end()){   cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n";  exit(1);  }
-                                               if(itB == nameMap->end()){   cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1);  }
+                                               if(itA == nameMap->end()){  m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1);  }
+                                               if(itB == nameMap->end()){  m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1);  }
                                                
                                                thisRowsBlastScores[itB->second] = score;
                                                
@@ -252,8 +258,13 @@ int ReadBlast::read(NameAssignment* nameMap) {
                                //is this distance below cutoff
                                if (distance < cutoff) {
                                        if (!hclusterWanted) {
-                                               PCell value(itA->second, it->first, distance);
-                                               matrix->addCell(value);
+                                               if (itA->second < it->first) {
+                            PDistCell value(it->first, distance);
+                            matrix->addCell(itA->second, value);
+                        }else {
+                            PDistCell value(itA->second, distance);
+                            matrix->addCell(it->first, value);
+                        }
                                        }else{
                                                outDist << itA->first << '\t' << nameMap->get(it->first) << '\t' << distance << endl;
                                        }
@@ -271,7 +282,7 @@ int ReadBlast::read(NameAssignment* nameMap) {
                if (m->control_pressed) { 
                                fileHandle.close();
                                if (!hclusterWanted) {  delete matrix; }
-                               else { outOverlap.close(); remove(overlapFile.c_str()); outDist.close(); remove(distFile.c_str());  }
+                               else { outOverlap.close(); m->mothurRemove(overlapFile); outDist.close(); m->mothurRemove(distFile);  }
                                delete reading;
                                return 0;
                }
@@ -286,7 +297,7 @@ int ReadBlast::read(NameAssignment* nameMap) {
                if (m->control_pressed) { 
                                fileHandle.close();
                                if (!hclusterWanted) {  delete matrix; }
-                               else {  remove(overlapFile.c_str());  remove(distFile.c_str());  }
+                               else {  m->mothurRemove(overlapFile);  m->mothurRemove(distFile);  }
                                delete reading;
                                return 0;
                }
@@ -311,16 +322,16 @@ int ReadBlast::readNames(NameAssignment* nameMap) {
                int num = 1;
                
                ifstream in;
-               openInputFile(blastfile, in);
+               m->openInputFile(blastfile, in);
                
                //ofstream outName;
-               //openOutputFile((blastfile + ".tempOutNames"), outName);
+               //m->openOutputFile((blastfile + ".tempOutNames"), outName);
                
                //read first line
                in >> prevName;
        
                for (int i = 0; i < 11; i++) {  in >> hold;  }
-               gobble(in);
+               m->gobble(in);
                                
                //save name in nameMap
                nameMap->push_back(prevName);
@@ -332,12 +343,17 @@ int ReadBlast::readNames(NameAssignment* nameMap) {
                        in >> name;
        
                        for (int i = 0; i < 11; i++) {  in >> hold;  }
-                       gobble(in);
+                       m->gobble(in);
                        
                        //is this a new name?
                        if (name != prevName) {
                                prevName = name;
-                               nameMap->push_back(name);
+                
+                if (nameMap->get(name) != -1) { m->mothurOut("[ERROR]: trying to exact names from blast file, and I found dups.  Are you sequence names unique? quitting.\n"); m->control_pressed = true; }
+                else {
+                    nameMap->push_back(name);
+                }
+                //outName << name << '\t' << name << endl;
                                num++;
                        }
                }
@@ -345,9 +361,9 @@ int ReadBlast::readNames(NameAssignment* nameMap) {
                in.close();
                
                //write out names file
-               //string outNames = getRootName(blastfile) + "names";
+               //string outNames = m->getRootName(blastfile) + "names";
                //ofstream out;
-               //openOutputFile(outNames, out);
+               //m->openOutputFile(outNames, out);
                //nameMap->print(out);
                //out.close();