]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerarealigner.cpp
fixes while testing 1.33.0
[mothur.git] / chimerarealigner.cpp
index f45b9a87f6829b68c81c8b137758ec3f6a096022..c95a1e7a4fee9efe7c821c5448fc44bf5dbd7a75 100644 (file)
 #include "nast.hpp"
 
 //***************************************************************************************************************
-ChimeraReAligner::ChimeraReAligner(vector<Sequence*> t, int m, int mm) : match(m), misMatch(mm) {  templateSeqs = t;  }
+ChimeraReAligner::ChimeraReAligner()  {  m = MothurOut::getInstance(); }
 //***************************************************************************************************************
 ChimeraReAligner::~ChimeraReAligner() {}       
 //***************************************************************************************************************
-void ChimeraReAligner::reAlign(Sequence* query, vector<results> parents) {
+void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
+
+       
        try {
-               if (parents.size() != 0) {
-                       vector<Sequence*> queryParts;
-                       vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
+//             if (parents.size() != 0) {
+//                     vector<Sequence*> queryParts;
+//                     vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
+//                     
+//                     string qAligned = query->getAligned();
+//                     string newQuery = "";
+//             
+//                     //sort parents by region start
+//                     sort(parents.begin(), parents.end(), compareRegionStart);
+//
+//                     //make sure you don't cutoff beginning of query 
+//                     if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart);  }
+//                     int longest = 0;
+//
+//                     //take query and break apart into pieces using breakpoints given by results of parents
+//                     for (int i = 0; i < parents.size(); i++) {
+//                             int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
+//                             string q = qAligned.substr(parents[i].nastRegionStart, length);
+//     
+//                             Sequence* queryFrag = new Sequence(query->getName(), q);
+//                             queryParts.push_back(queryFrag);
+//             
+//                             string p = parents[i].parentAligned;
+//                             p = p.substr(parents[i].nastRegionStart, length);
+//                             Sequence* parent = new Sequence(parents[i].parent, p);
+//                             parentParts.push_back(parent);
+//
+//                             if (queryFrag->getUnaligned().length() > longest)       { longest = queryFrag->getUnaligned().length(); }
+//                             if (parent->getUnaligned().length() > longest)  { longest = parent->getUnaligned().length();    }
+//                     }
+//
+//                     //align each peice to correct parent from results
+//                     for (int i = 0; i < queryParts.size(); i++) {
+//                             if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
+//                             else {
+//                                     Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
+//                             
+//                                     Nast nast(alignment, queryParts[i], parentParts[i]);
+//                                     delete alignment;
+//                             }
+//                     }
+//
+//                     //recombine pieces to form new query sequence
+//                     for (int i = 0; i < queryParts.size(); i++) {
+//                             //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.  
+//                             //we don't want to loose length so in this case we will leave query alone
+//                             if (i != 0) {
+//                                     int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
+//                                     if (space > 0) { //they don't meet and we need to add query piece
+//                                             string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
+//                                             newQuery += q;
+//                                     }
+//                             }
+//
+//                             newQuery += queryParts[i]->getAligned();
+//                     }
+//                     
+//                     //make sure you don't cutoff end of query 
+//                     if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1);  }
+//                     
+//                     query->setAligned(newQuery);
+//
+//                     //free memory
+//                     for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
+//                     for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
+//                     
+//             } //else leave query alone, you have bigger problems...
+
+               if(parents.size() != 0){
+
+                       alignmentLength = query->getAlignLength();      //x
+                       int queryUnalignedLength = query->getNumBases();        //y
                        
-                       string qAligned = query->getAligned();
-                       string newQuery = "";
+                       buildTemplateProfile(parents);
                        
-                       //sort parents by region start
-                       sort(parents.begin(), parents.end(), compareRegionStart);
-
-                       //make sure you don't cutoff beginning of query 
-                       if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart);  }
-                       int longest = 0;
-                       //take query and break apart into pieces using breakpoints given by results of parents
-                       for (int i = 0; i < parents.size(); i++) {
-       cout << parents[i].parent << '\t' << parents[i].nastRegionStart <<  '\t' << parents[i].nastRegionEnd    << endl;        
-                               int length = parents[i].nastRegionEnd - parents[i].nastRegionStart;
-                               string q = qAligned.substr(parents[i].nastRegionStart, length);
-       cout << "query = " << q << endl;
-                               Sequence* queryFrag = new Sequence(query->getName(), q);
-                               
-                               queryParts.push_back(queryFrag);
-                               
-                               Sequence* parent = getSequence(parents[i].parent);
-                               string p = parent->getAligned();
+                       createAlignMatrix(queryUnalignedLength, alignmentLength);
+                       fillAlignMatrix(query->getUnaligned());
+                       query->setAligned(getNewAlignment(query->getUnaligned()));
+               }
                
-                               p = p.substr(parents[i].nastRegionStart, length);
-       cout << "parent = " << p << endl;               
-                               parent->setAligned(p);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "reAlign");
+               exit(1);
+       }
+}
+/***************************************************************************************************************/
+
+void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
+       try{    
+               int numParents = parents.size();
+
+               profile.resize(alignmentLength);
                                
-                               parentParts.push_back(parent);
+               for(int i=0;i<numParents;i++){
+                       string seq = parents[i];
+
+                       for(int j=0;j<alignmentLength;j++){
+                                       
+                               
+                               if(seq[j] == 'A')               {       profile[j].A++;         }
+                               else if(seq[j] == 'T')  {       profile[j].T++;         }
+                               else if(seq[j] == 'G')  {       profile[j].G++;         }
+                               else if(seq[j] == 'C')  {       profile[j].C++;         }
+                               else if(seq[j] == '-')  {       profile[j].Gap++;       }
+                               else if(seq[j] == '.')  {       profile[j].Gap++;       }
+//                             else                                    {       profile[j].A++;         }
                                
-                               if (q.length() > longest)       { longest = q.length(); }
-                               if (p.length() > longest)       { longest = p.length(); }
-                       }
-                                                                                       
-                       //align each peice to correct parent from results
-                       for (int i = 0; i < queryParts.size(); i++) {
-                               alignment = new NeedlemanOverlap(-2.0, match, misMatch, longest+1); //default gapopen, match, mismatch, longestbase
-                               Nast nast(alignment, queryParts[i], parentParts[i]);
-                               delete alignment;
-                       }
-                                                                                               
-                       //recombine pieces to form new query sequence
-                       for (int i = 0; i < queryParts.size(); i++) {
-                               newQuery += queryParts[i]->getAligned();
                        }
+               }
+               
+
+               for(int i=0;i<alignmentLength;i++){
+                       profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
+               exit(1);
+       }
+}
+       
+/***************************************************************************************************************/
+
+void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
+       
+       try{
+               alignMatrix.resize(alignmentLength+1);
+               for(int i=0;i<=alignmentLength;i++){
+                       alignMatrix[i].resize(queryUnalignedLength+1);
+               }
+
+               for(int i=1;i<=alignmentLength;i++)             {       alignMatrix[i][0].direction = 'l';      }
+               for(int j=1;j<=queryUnalignedLength;j++){       alignMatrix[0][j].direction = 'u';      }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
+               exit(1);
+       }
+}
+
+/***************************************************************************************************************/
+
+void ChimeraReAligner::fillAlignMatrix(string query){
+       try{
+               int GAP = -4;
+               
+               int nrows = alignMatrix.size()-1;
+               int ncols = alignMatrix[0].size()-1;
+                               
+               for(int i=1;i<=nrows;i++){
                        
+                       bases p = profile[i-1];
+                       int numChars = p.Chars;
                        
-                       //make sure you don't cutoff end of query 
-                       if (parents[parents.size()-1].nastRegionEnd < qAligned.length()) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd);  }
-                       
-                       //set query to new aligned string
-                       query->setAligned(newQuery);
-                       
-                       //free memory
-                       for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
-                       for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
+                       for(int j=1;j<=ncols;j++){
                        
-               } //else leave query alone, you have bigger problems...
+                               char q = query[j-1];
+                               
+                               //      score it for if there was a match
+                               int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
+                               int maxDirection = 'd';
+                               
+                               //      score it for if there was a gap in the query
+                               int score = alignMatrix[i-1][j].score + (numChars * GAP);
+                               if (score > maxScore) {
+                                       maxScore = score;
+                                       maxDirection = 'l';
+                               }
+                               
+                               alignMatrix[i][j].score = maxScore;
+                               alignMatrix[i][j].direction = maxDirection;
+                               
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
+               exit(1);
+       }
+}
+
+/***************************************************************************************************************/
+
+int ChimeraReAligner::calcMatchScore(bases p, char q){
+       try{
                
+               int MATCH = 5;
+               int MISMATCH = -4;
+               
+               int score = 0;
+               
+               if(q == 'G')            {       score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap));           }
+               else if(q == 'A')       {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
+               else if(q == 'T')       {       score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap));           }
+               else if(q == 'C')       {       score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap));           }
+               else                            {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
+
+               return score;
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraReAligner", "reAlign");
+               m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
                exit(1);
        }
 }
-//***************************************************************************************************************
-Sequence* ChimeraReAligner::getSequence(string name) {
+
+/***************************************************************************************************************/
+
+string ChimeraReAligner::getNewAlignment(string query){
        try{
-               Sequence* temp;
+               string queryAlignment(alignmentLength, '.');
+               string referenceAlignment(alignmentLength, '.');
                
-               //look through templateSeqs til you find it
-               int spot = -1;
-               for (int i = 0; i < templateSeqs.size(); i++) {
-                       if (name == templateSeqs[i]->getName()) {  
-                               spot = i;
-                               break;
+               
+               int maxScore = -99999999;
+               
+               int nrows = alignMatrix.size()-1;
+               int ncols = alignMatrix[0].size()-1;
+
+               int bestCol = -1;
+               int bestRow = -1;
+               
+               for(int i=1;i<=nrows;i++){
+                       int score = alignMatrix[i][ncols].score;
+                       if (score > maxScore) {
+                               maxScore = score;
+                               bestRow = i;
+                               bestCol = ncols;
                        }
                }
                
-               if(spot == -1) { mothurOut("Error: Could not find sequence."); mothurOutEndLine(); return NULL; }
+               for(int j=1;j<=ncols;j++){
+                       int score = alignMatrix[nrows][j].score;
+                       if (score > maxScore) {
+                               maxScore = score;
+                               bestRow = nrows;
+                               bestCol = j;
+                       }
+               }
+               
+               int currentRow = bestRow;
+               int currentCol = bestCol;
                
-               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               int alignmentPosition = 0;
+               if(currentRow < alignmentLength){
+                       for(int i=alignmentLength;i>currentRow;i--){
+                               alignmentPosition++;
+                       }
+               }
                
-               return temp;
+               AlignCell c = alignMatrix[currentRow][currentCol];
+               while(c.direction != 'x'){
+                       
+                       char q;
+
+                       if(c.direction == 'd'){
+                               q = query[currentCol-1];
+                               currentCol--;
+                               currentRow--;
+                       }
+                       
+                       
+                       else if (c.direction == 'u') {
+                               break;
+                       }                                       
+                       else if(c.direction == 'l'){
+                               char gapChar;
+                               if(currentCol == 0)     {       gapChar = '.';  }
+                               else                            {       gapChar = '-';  }
+                                                               
+                               q = gapChar;
+                               currentRow--;
+                       }
+                       else{
+                               cout << "you shouldn't be here..." << endl;
+                       }
+
+                       queryAlignment[alignmentPosition] = q;
+                       alignmentPosition++;
+                       c = alignMatrix[currentRow][currentCol];
+               }
+
+//             need to reverse the string
+               string flipSeq = "";
+               for(int i=alignmentLength-1;i>=0;i--){
+                       flipSeq += queryAlignment[i];                   
+               }
+
+               return flipSeq;
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraReAligner", "getSequence");
+               m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
                exit(1);
        }
 }
-//***************************************************************************************************************
+
+/***************************************************************************************************************/
+
+// Sequence* ChimeraReAligner::getSequence(string name) {
+//     try{
+//             Sequence* temp;
+//             
+//             //look through templateSeqs til you find it
+//             int spot = -1;
+//             for (int i = 0; i < templateSeqs.size(); i++) {
+//                     if (name == templateSeqs[i]->getName()) {  
+//                             spot = i;
+//                             break;
+//                     }
+//             }
+//             
+//             if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
+//             
+//             temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+//             
+//             return temp;
+//     }
+//     catch(exception& e) {
+//             m->errorOut(e, "ChimeraReAligner", "getSequence");
+//             exit(1);
+//     }
+//}
+
+//***************************************************************************************************************/