]> git.donarmstrong.com Git - mothur.git/commitdiff
chimera.slayer debugging
authorpschloss <pschloss>
Fri, 6 May 2011 00:41:14 +0000 (00:41 +0000)
committerpschloss <pschloss>
Fri, 6 May 2011 00:41:14 +0000 (00:41 +0000)
blastdb.cpp
chimeraslayer.cpp
maligner.cpp

index 6d7ab18b870567cff8a39dc3b848a3de9b3c9c4f..438d90c30961dfbabe5436483f3d33a23fc537ae 100644 (file)
@@ -128,6 +128,7 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) {
                //      wordsize used in megablast.  I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
                //      long.  With this setting, it seems comparable in speed to the suffix tree approach.
 //7000004128189528left 0       100             66      0       0       1       66      61      126     1e-31    131    
+               
                string blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
                blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
                system(blastCommand.c_str());
index 1d22bdfe36f74194d2609c01b956cf0ab8ba67c8..a182b846cc4f84288e029188f53c25dcf4ea93cc 100644 (file)
@@ -545,6 +545,8 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                        
                        bool rightChimeric = false;
                        bool leftChimeric = false;
+
+                       cout << endl;
                        
                        if (chimeraFlag == "yes") {     
                                //which peice is chimeric or are both
@@ -552,7 +554,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                                if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
                                
                                if (rightChimeric || leftChimeric) {
-//                                     cout << querySeq->getName() <<  "\tyes" << endl;
+                                       cout << querySeq->getName() <<  "\tyes" << endl;
                                        outAccString += querySeq->getName() + "\n";
                                        results = true;
                                        
@@ -1049,46 +1051,66 @@ vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db
                
                vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
                vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
-               //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
-               vector<int> smaller;
-               vector<int> larger;
+                               
                
-               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
-               else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
+               //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
+//             vector<int> smaller;
+//             vector<int> larger;
+//             
+//             if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
+//             else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
                
                //merge results         
                map<int, int> seen;
                map<int, int>::iterator it;
                vector<int> mergedResults;
-               for (int i = 0; i < smaller.size(); i++) {
+               
+               int index = 0;
+//             for (int i = 0; i < smaller.size(); i++) {
+               while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
+                       
                        if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
        
                        //add left if you havent already
-                       it = seen.find(smaller[i]);
+                       it = seen.find(tempIndexesLeft[index]);
                        if (it == seen.end()) {  
-                               mergedResults.push_back(smaller[i]);
-                               seen[smaller[i]] = smaller[i];
+                               mergedResults.push_back(tempIndexesLeft[index]);
+                               seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
                        }
                        
                        //add right if you havent already
-                       it = seen.find(larger[i]);
+                       it = seen.find(tempIndexesRight[index]);
                        if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
+                               mergedResults.push_back(tempIndexesRight[index]);
+                               seen[tempIndexesRight[index]] = tempIndexesRight[index];
                        }
+                       index++;
                }
+
                
-               for (int i = smaller.size(); i < larger.size(); i++) {
+               for (int i = index; i < tempIndexesLeft.size(); i++) {
                        if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
                        
                        //add right if you havent already
-                       it = seen.find(larger[i]);
+                       it = seen.find(tempIndexesLeft[i]);
                        if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
+                               mergedResults.push_back(tempIndexesLeft[i]);
+                               seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
                        }
                }
 
+               for (int i = index; i < tempIndexesRight.size(); i++) {
+                       if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+                       
+                       //add right if you havent already
+                       it = seen.find(tempIndexesRight[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(tempIndexesRight[i]);
+                               seen[tempIndexesRight[i]] = tempIndexesRight[i];
+                       }
+               }
+               
+               
                for (int i = 0; i < mergedResults.size(); i++) {
                        //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
                        if (db[mergedResults[i]]->getName() != q->getName()) { 
@@ -1098,11 +1120,7 @@ vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db
                        }
                }
                
-               
-//             for(int i=0;i<refResults.size();i++){
-//                     cout << refResults[i]->getName() << endl;
-//             }
-                       
+
                delete queryRight;
                delete queryLeft;
                
index d7731facce712bb98fd239793cda78dfa7cc69cc..1205cef5e36b8bb4381f5ae8bb516c30e934318d 100644 (file)
@@ -73,6 +73,11 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
                if (query->getAligned() == "") { return "no"; }
 
                vector<Sequence*> temp = refSeqs;
+               
+//             for(int i=0;i<refSeqs.size();i++){
+//                     cout << refSeqs[i]->getName() << endl;
+//             }
+               
                temp.push_back(query);
                        
                verticalFilter(temp);
@@ -90,7 +95,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
                if (m->control_pressed) { return chimera;  }
                
                vector<trace_struct> trace = mapTraceRegionsToAlignment(path, refSeqs);
-               
+                       
                if (trace.size() > 1) {         chimera = "yes";        }
                else { chimera = "no";  return chimera; }
                
@@ -98,13 +103,16 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
                int traceEnd = path[path.size()-1].col; 
                string queryInRange = query->getAligned();
                queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1));
-               
+//             cout << queryInRange << endl;
                string chimeraSeq = constructChimericSeq(trace, refSeqs);
+//             cout << chimeraSeq << endl;
+               
 //             cout << queryInRange.length() << endl;
 //             cout << chimeraSeq.length() << endl;
                
                percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
                
+//             cout << percentIdenticalQueryChimera << endl;
        /*      
                vector<trace_struct> trace = extractHighestPath(matrix);
                                
@@ -129,6 +137,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
                        int regionEnd = trace[i].oldCol;
                        int seqIndex = trace[i].row;
                        
+//                     cout << regionStart << '\t' << regionEnd << '\t' << seqIndex << endl;
                        results temp;
                        
                        temp.parent = refSeqs[seqIndex]->getName();
@@ -316,6 +325,8 @@ void Maligner::fillScoreMatrix(vector<vector<score_struct> >& ms, vector<Sequenc
                int numCols = query->getAligned().length();
                int numRows = seqs.size();
                
+//             cout << numRows << endl;
+               
                //initialize first col
                string queryAligned = query->getAligned();
                for (int i = 0; i < numRows; i++) {
@@ -337,6 +348,7 @@ void Maligner::fillScoreMatrix(vector<vector<score_struct> >& ms, vector<Sequenc
                //fill rest of matrix
                for (int j = 1; j < numCols; j++) {  //iterate through matrix columns
                
+//                     for (int i = 0; i < 1; i++) {  //iterate through matrix rows
                        for (int i = 0; i < numRows; i++) {  //iterate through matrix rows
                                
                                string subjectAligned = seqs[i]->getAligned();
@@ -346,16 +358,13 @@ void Maligner::fillScoreMatrix(vector<vector<score_struct> >& ms, vector<Sequenc
                                if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) {
                                        //leave the same
                                }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) {
-                                       //matchMisMatchScore = matchScore;
                                        //leave the same
                                }else if (queryAligned[j] == subjectAligned[j]) {
                                        matchMisMatchScore = matchScore;
-//                                     ms[i][j].mismatches = ms[i][j-1].mismatches;
                                }else if (queryAligned[j] != subjectAligned[j]) {
                                        matchMisMatchScore = misMatchPenalty;
-//                                     ms[i][j].mismatches = ms[i][j-1].mismatches + 1;
                                }
-                               
+
                                //compute score based on previous columns scores
                                for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows
                                        
@@ -370,35 +379,33 @@ void Maligner::fillScoreMatrix(vector<vector<score_struct> >& ms, vector<Sequenc
                                                ms[i][j].prev = prevIndex;
                                        }
                                }
+//                             cout << i << '\t' << j << '\t' << queryAligned[j] << '\t' << subjectAligned[j] << '\t' << matchMisMatchScore << '\t' << ms[i][j].score << endl;
+
                        }
+                       
                }
                
-       /*      for(int i=0;i<numRows;i++){
-                       cout << seqs[i]->getName();
-                       for(int j=0;j<numCols;j++){
-                               cout << '\t' << ms[i][j].mismatches;
-                       }
-                       cout << endl;
-               }
-               cout << endl;*/
-               /*cout << numRows << '\t' << numCols << endl;
-               for(int i=0;i<numRows;i++){
-                       cout << seqs[i]->getName() << endl << seqs[i]->getAligned() << endl << endl;
-                       if ((seqs[i]->getName() == "S000003470") || (seqs[i]->getName() == "S000383265") || (seqs[i]->getName() == "7000004128191054")) {
-                       for(int j=0;j<numCols;j++){
-                               cout << '\t' << ms[i][j].score;
-                       }
-                       cout << endl;
-                       }
-               }
-               cout << endl;*/
-               /*for(int i=0;i<numRows;i++){
-                       cout << seqs[i]->getName();
-                       for(int j=0;j<numCols;j++){
-                               cout << '\t' << ms[i][j].prev;
-                       }
-                       cout << endl;
-               }*/
+               
+               
+               
+               
+//             cout << numRows << '\t' << numCols << endl;
+//             for(int i=0;i<numRows;i++){
+//                     cout << seqs[i]->getName();
+//                     for(int j=0;j<numCols;j++){
+//                             cout << '\t' << ms[i][j].score;
+//                     }
+//                     cout << endl;
+//             }
+//             cout << endl;
+//             
+//             for(int i=0;i<numRows;i++){
+//                     cout << seqs[i]->getName();
+//                     for(int j=0;j<numCols;j++){
+//                             cout << '\t' << ms[i][j].prev;
+//                     }
+//                     cout << endl;
+//             }
                
                
        }
@@ -429,6 +436,7 @@ vector<score_struct> Maligner::extractHighestPath(vector<vector<score_struct> >
                        }
                }
                
+//             cout << highestScore << endl;
                vector<score_struct> path;
                
                int rowIndex = highestStruct.row;
@@ -466,7 +474,8 @@ vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> p
                for (int i = 1; i < path.size(); i++) {
                        
                        int next_region_index = path[i].row;
-                       
+//                     cout << i << '\t' << next_region_index << endl;
+
                        if (next_region_index != region_index) {
                                
                                // add trace region
@@ -735,6 +744,8 @@ float Maligner::computePercentID(string queryAlign, string chimera) {
                
                if (numBases == 0) { return 0; }
        
+//             cout << numIdentical << '\t' << numBases << endl;
+               
                float percentIdentical = (numIdentical/(float)numBases) * 100;
                
 //             cout << percentIdentical << endl;