]> git.donarmstrong.com Git - mothur.git/commitdiff
incorporation of nast-ier code
authorpschloss <pschloss>
Tue, 3 May 2011 18:43:48 +0000 (18:43 +0000)
committerpschloss <pschloss>
Tue, 3 May 2011 18:43:48 +0000 (18:43 +0000)
chimerarealigner.cpp
chimerarealigner.h
chimeraslayer.cpp

index 8615fbb931b1d4d05fd3ae656b59407c91d94939..82c4427fb6a9ae4c635b44d64e13a6254c351aa7 100644 (file)
@@ -16,105 +16,334 @@ ChimeraReAligner::ChimeraReAligner()  {  m = MothurOut::getInstance(); }
 //***************************************************************************************************************
 ChimeraReAligner::~ChimeraReAligner() {}       
 //***************************************************************************************************************
-void ChimeraReAligner::reAlign(Sequence* query, vector<results> parents) {
+void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
+
+       
        try {
-               if (parents.size() != 0) {
-                       vector<Sequence*> queryParts;
-                       vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
-                       
-                       string qAligned = query->getAligned();
-                       string newQuery = "";
-               
-                       //sort parents by region start
-                       sort(parents.begin(), parents.end(), compareRegionStart);
+//             if (parents.size() != 0) {
+//                     vector<Sequence*> queryParts;
+//                     vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
+//                     
+//                     string qAligned = query->getAligned();
+//                     string newQuery = "";
+//             
+//                     //sort parents by region start
+//                     sort(parents.begin(), parents.end(), compareRegionStart);
+//
+//                     //make sure you don't cutoff beginning of query 
+//                     if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart);  }
+//                     int longest = 0;
+//
+//                     //take query and break apart into pieces using breakpoints given by results of parents
+//                     for (int i = 0; i < parents.size(); i++) {
+//                             int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
+//                             string q = qAligned.substr(parents[i].nastRegionStart, length);
+//     
+//                             Sequence* queryFrag = new Sequence(query->getName(), q);
+//                             queryParts.push_back(queryFrag);
+//             
+//                             string p = parents[i].parentAligned;
+//                             p = p.substr(parents[i].nastRegionStart, length);
+//                             Sequence* parent = new Sequence(parents[i].parent, p);
+//                             parentParts.push_back(parent);
+//
+//                             if (queryFrag->getUnaligned().length() > longest)       { longest = queryFrag->getUnaligned().length(); }
+//                             if (parent->getUnaligned().length() > longest)  { longest = parent->getUnaligned().length();    }
+//                     }
+//
+//                     //align each peice to correct parent from results
+//                     for (int i = 0; i < queryParts.size(); i++) {
+//                             if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
+//                             else {
+//                                     Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
+//                             
+//                                     Nast nast(alignment, queryParts[i], parentParts[i]);
+//                                     delete alignment;
+//                             }
+//                     }
+//
+//                     //recombine pieces to form new query sequence
+//                     for (int i = 0; i < queryParts.size(); i++) {
+//                             //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.  
+//                             //we don't want to loose length so in this case we will leave query alone
+//                             if (i != 0) {
+//                                     int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
+//                                     if (space > 0) { //they don't meet and we need to add query piece
+//                                             string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
+//                                             newQuery += q;
+//                                     }
+//                             }
+//
+//                             newQuery += queryParts[i]->getAligned();
+//                     }
+//                     
+//                     //make sure you don't cutoff end of query 
+//                     if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1);  }
+//                     
+//                     query->setAligned(newQuery);
+//
+//                     //free memory
+//                     for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
+//                     for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
+//                     
+//             } //else leave query alone, you have bigger problems...
 
-                       //make sure you don't cutoff beginning of query 
-                       if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart);  }
-                       int longest = 0;
+               if(parents.size() != 0){
 
-                       //take query and break apart into pieces using breakpoints given by results of parents
-                       for (int i = 0; i < parents.size(); i++) {
-                               int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
-                               string q = qAligned.substr(parents[i].nastRegionStart, length);
-       
-                               Sequence* queryFrag = new Sequence(query->getName(), q);
-                               queryParts.push_back(queryFrag);
+                       alignmentLength = query->getAlignLength();      //x
+                       int queryUnalignedLength = query->getNumBases();        //y
+                       
+                       
+                       buildTemplateProfile(parents);
+                       
+                       createAlignMatrix(queryUnalignedLength, alignmentLength);
+                       fillAlignMatrix(query->getUnaligned());
+                       query->setAligned(getNewAlignment(query->getUnaligned()));
+               }
                
-                               string p = parents[i].parentAligned;
-                               p = p.substr(parents[i].nastRegionStart, length);
-                               Sequence* parent = new Sequence(parents[i].parent, p);
-                               parentParts.push_back(parent);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "reAlign");
+               exit(1);
+       }
+}
+/***************************************************************************************************************/
 
-                               if (queryFrag->getUnaligned().length() > longest)       { longest = queryFrag->getUnaligned().length(); }
-                               if (parent->getUnaligned().length() > longest)  { longest = parent->getUnaligned().length();    }
-                       }
+void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
+       try{    
+               int numParents = parents.size();
 
-                       //align each peice to correct parent from results
-                       for (int i = 0; i < queryParts.size(); i++) {
-                               if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
-                               else {
-                                       Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
+               profile.resize(alignmentLength);
+                               
+               for(int i=0;i<numParents;i++){
+                       string seq = parents[i];
+
+                       for(int j=0;j<alignmentLength;j++){
+                                       
+                               
+                               if(seq[j] == 'A')               {       profile[j].A++;         }
+                               else if(seq[j] == 'T')  {       profile[j].T++;         }
+                               else if(seq[j] == 'G')  {       profile[j].G++;         }
+                               else if(seq[j] == 'C')  {       profile[j].C++;         }
+                               else if(seq[j] == '-')  {       profile[j].Gap++;       }
+                               else if(seq[j] == '.')  {       profile[j].Gap++;       }
+                               else                                    {       profile[j].A++;         }
                                
-                                       Nast nast(alignment, queryParts[i], parentParts[i]);
-                                       delete alignment;
-                               }
                        }
+               }
+               
 
-                       //recombine pieces to form new query sequence
-                       for (int i = 0; i < queryParts.size(); i++) {
-                               //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.  
-                               //we don't want to loose length so in this case we will leave query alone
-                               if (i != 0) {
-                                       int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
-                                       if (space > 0) { //they don't meet and we need to add query piece
-                                               string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
-                                               newQuery += q;
-                                       }
-                               }
+               for(int i=0;i<alignmentLength;i++){
+                       profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
+               exit(1);
+       }
+}
+       
+/***************************************************************************************************************/
 
-                               newQuery += queryParts[i]->getAligned();
-                       }
+void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
+       
+       try{
+               alignMatrix.resize(alignmentLength+1);
+               for(int i=0;i<=alignmentLength;i++){
+                       alignMatrix[i].resize(queryUnalignedLength+1);
+               }
+
+               for(int i=1;i<=alignmentLength;i++)             {       alignMatrix[i][0].direction = 'l';      }
+               for(int j=1;j<=queryUnalignedLength;j++){       alignMatrix[0][j].direction = 'u';      }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
+               exit(1);
+       }
+}
+
+/***************************************************************************************************************/
+
+void ChimeraReAligner::fillAlignMatrix(string query){
+       try{
+               int GAP = -4;
+               
+               int nrows = alignMatrix.size()-1;
+               int ncols = alignMatrix[0].size()-1;
+                               
+               for(int i=1;i<=nrows;i++){
                        
-                       //make sure you don't cutoff end of query 
-                       if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1);  }
+                       bases p = profile[i-1];
+                       int numChars = p.Chars;
                        
-                       query->setAligned(newQuery);
-
-                       //free memory
-                       for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
-                       for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
+                       for(int j=1;j<=ncols;j++){
                        
-               } //else leave query alone, you have bigger problems...
+                               char q = query[j-1];
+                               
+                               //      score it for if there was a match
+                               int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
+                               int maxDirection = 'd';
+                               
+                               //      score it for if there was a gap in the query
+                               int score = alignMatrix[i-1][j].score + (numChars * GAP);
+                               if (score > maxScore) {
+                                       maxScore = score;
+                                       maxDirection = 'l';
+                               }
+                               
+                               alignMatrix[i][j].score = maxScore;
+                               alignMatrix[i][j].direction = maxDirection;
+                               
+                       }
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
+               exit(1);
+       }
+}
+
+/***************************************************************************************************************/
+
+int ChimeraReAligner::calcMatchScore(bases p, char q){
+       try{
                
+               int MATCH = 5;
+               int MISMATCH = -4;
+               
+               int score = 0;
+               
+               if(q == 'G')            {       score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap));           }
+               else if(q == 'A')       {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
+               else if(q == 'T')       {       score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap));           }
+               else if(q == 'C')       {       score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap));           }
+               else                            {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
+
+               return score;
        }
        catch(exception& e) {
-               m->errorOut(e, "ChimeraReAligner", "reAlign");
+               m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
                exit(1);
        }
 }
-/***************************************************************************************************************
-Sequence* ChimeraReAligner::getSequence(string name) {
+
+/***************************************************************************************************************/
+
+string ChimeraReAligner::getNewAlignment(string query){
        try{
-               Sequence* temp;
+               string queryAlignment(alignmentLength, '.');
+               string referenceAlignment(alignmentLength, '.');
                
-               //look through templateSeqs til you find it
-               int spot = -1;
-               for (int i = 0; i < templateSeqs.size(); i++) {
-                       if (name == templateSeqs[i]->getName()) {  
-                               spot = i;
-                               break;
+               
+               int maxScore = -99999999;
+               
+               int nrows = alignMatrix.size()-1;
+               int ncols = alignMatrix[0].size()-1;
+
+               int bestCol = -1;
+               int bestRow = -1;
+               
+               for(int i=1;i<=nrows;i++){
+                       int score = alignMatrix[i][ncols].score;
+                       if (score > maxScore) {
+                               maxScore = score;
+                               bestRow = i;
+                               bestCol = ncols;
+                       }
+               }
+               
+               for(int j=1;j<=ncols;j++){
+                       int score = alignMatrix[nrows][j].score;
+                       if (score > maxScore) {
+                               maxScore = score;
+                               bestRow = nrows;
+                               bestCol = j;
                        }
                }
                
-               if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
+               int currentRow = bestRow;
+               int currentCol = bestCol;
                
-               temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+               int alignmentPosition = 0;
+               if(currentRow < alignmentLength){
+                       for(int i=alignmentLength;i>currentRow;i--){
+                               alignmentPosition++;
+                       }
+               }
                
-               return temp;
+               AlignCell c = alignMatrix[currentRow][currentCol];
+               while(c.direction != 'x'){
+                       
+                       char q;
+
+                       if(c.direction == 'd'){
+                               q = query[currentCol-1];
+                               currentCol--;
+                               currentRow--;
+                       }
+                       
+                       
+                       else if (c.direction == 'u') {
+                               break;
+                       }                                       
+                       else if(c.direction == 'l'){
+                               char gapChar;
+                               if(currentCol == 0)     {       gapChar = '.';  }
+                               else                            {       gapChar = '-';  }
+                                                               
+                               q = gapChar;
+                               currentRow--;
+                       }
+                       else{
+                               cout << "you shouldn't be here..." << endl;
+                       }
+
+                       queryAlignment[alignmentPosition] = q;
+                       alignmentPosition++;
+                       c = alignMatrix[currentRow][currentCol];
+               }
+
+//             need to reverse the string
+               string flipSeq = "";
+               for(int i=alignmentLength-1;i>=0;i--){
+                       flipSeq += queryAlignment[i];                   
+               }
+               
+               return flipSeq;
        }
        catch(exception& e) {
-               m->errorOut(e, "ChimeraReAligner", "getSequence");
+               m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
                exit(1);
        }
 }
+
+/***************************************************************************************************************/
+
+// Sequence* ChimeraReAligner::getSequence(string name) {
+//     try{
+//             Sequence* temp;
+//             
+//             //look through templateSeqs til you find it
+//             int spot = -1;
+//             for (int i = 0; i < templateSeqs.size(); i++) {
+//                     if (name == templateSeqs[i]->getName()) {  
+//                             spot = i;
+//                             break;
+//                     }
+//             }
+//             
+//             if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
+//             
+//             temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
+//             
+//             return temp;
+//     }
+//     catch(exception& e) {
+//             m->errorOut(e, "ChimeraReAligner", "getSequence");
+//             exit(1);
+//     }
+//}
+
 //***************************************************************************************************************/
index cf7115804122d202c62903b31664cb7db1b059e3..5809f42997381ab53ba5aaeb43a6c0d0b5cb6619 100644 (file)
 
 /***********************************************************/
 
+struct AlignCell {
+       int score;
+       char direction;
+       AlignCell() : score(0), direction('x') {};
+};
+
+/***********************************************************/
+
+struct  bases {
+       int A, T, G, C, Gap, Chars;
+       bases() : A(0), T(0), G(0), C(0), Gap(0), Chars(0){};
+};
+
+/***********************************************************/
+
+
 class ChimeraReAligner  {
        
-       public:
-               ChimeraReAligner();      
-               ~ChimeraReAligner();
-               
-               void reAlign(Sequence*, vector<results>);
+public:
+       ChimeraReAligner();      
+       ~ChimeraReAligner();
+       
+       void reAlign(Sequence*, vector<string>);
                                
-       private:
-               Sequence* querySeq;
-               MothurOut* m;
+private:
+       void buildTemplateProfile(vector<string>);
+       void createAlignMatrix(int, int);
+       void fillAlignMatrix(string);
+       int calcMatchScore(bases, char);
+       string getNewAlignment(string);
+
+       int alignmentLength;
+       vector<bases> profile;
+       vector<vector<AlignCell> > alignMatrix;
+
+       MothurOut* m;
 };
+
 /***********************************************************/
 
 #endif
index 1f7160aa0d91e8f049fbca4c8b5a85c0bed349d9..59b2fea4fa895a10ffe6ff9f462c448c7fa88014 100644 (file)
@@ -763,22 +763,18 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
                
                if (chimeraFlag == "yes") {
-               
+
                        if (realign) {
-                               vector<Sequence*> parents;
+                               vector<string> parents;
                                for (int i = 0; i < Results.size(); i++) {
-cout << Results[i].parent  << '\t' << Results[i].nastRegionStart << '\t' << Results[i].nastRegionEnd  << endl;
-                                       Sequence* parent = new Sequence(Results[i].parent, Results[i].parentAligned);
-                                       
-                                       parents.push_back(parent);
+                                       parents.push_back(Results[i].parentAligned);
                                }
                                
-                               ChimeraReAligner realigner;
-                               //realigner.reAlign(query, parents);
-                               
-                               for (int i = 0; i < parents.size(); i++) { delete parents[i]; }
+                               ChimeraReAligner realigner;             
+                               realigner.reAlign(query, parents);
+
                        }
-       //query->printSequence(cout);
+
                        //get sequence that were given from maligner results
                        vector<SeqDist> seqs;
                        map<string, float> removeDups;