]> git.donarmstrong.com Git - mothur.git/blob - chimerarealigner.cpp
changed random forest output filename
[mothur.git] / chimerarealigner.cpp
1 /*
2  *  chimerarealigner.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 2/12/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerarealigner.h"
11 #include "needlemanoverlap.hpp"
12 #include "nast.hpp"
13
14 //***************************************************************************************************************
15 ChimeraReAligner::ChimeraReAligner()  {  m = MothurOut::getInstance(); }
16 //***************************************************************************************************************
17 ChimeraReAligner::~ChimeraReAligner() {}        
18 //***************************************************************************************************************
19 void ChimeraReAligner::reAlign(Sequence* query, vector<string> parents) {
20
21         
22         try {
23 //              if (parents.size() != 0) {
24 //                      vector<Sequence*> queryParts;
25 //                      vector<Sequence*> parentParts;  //queryParts[0] relates to parentParts[0]
26 //                      
27 //                      string qAligned = query->getAligned();
28 //                      string newQuery = "";
29 //              
30 //                      //sort parents by region start
31 //                      sort(parents.begin(), parents.end(), compareRegionStart);
32 //
33 //                      //make sure you don't cutoff beginning of query 
34 //                      if (parents[0].nastRegionStart > 0) {  newQuery += qAligned.substr(0, parents[0].nastRegionStart);  }
35 //                      int longest = 0;
36 //
37 //                      //take query and break apart into pieces using breakpoints given by results of parents
38 //                      for (int i = 0; i < parents.size(); i++) {
39 //                              int length = parents[i].nastRegionEnd - parents[i].nastRegionStart+1;
40 //                              string q = qAligned.substr(parents[i].nastRegionStart, length);
41 //      
42 //                              Sequence* queryFrag = new Sequence(query->getName(), q);
43 //                              queryParts.push_back(queryFrag);
44 //              
45 //                              string p = parents[i].parentAligned;
46 //                              p = p.substr(parents[i].nastRegionStart, length);
47 //                              Sequence* parent = new Sequence(parents[i].parent, p);
48 //                              parentParts.push_back(parent);
49 //
50 //                              if (queryFrag->getUnaligned().length() > longest)       { longest = queryFrag->getUnaligned().length(); }
51 //                              if (parent->getUnaligned().length() > longest)  { longest = parent->getUnaligned().length();    }
52 //                      }
53 //
54 //                      //align each peice to correct parent from results
55 //                      for (int i = 0; i < queryParts.size(); i++) {
56 //                              if ((queryParts[i]->getUnaligned() == "") || (parentParts[i]->getUnaligned() == "")) {;}
57 //                              else {
58 //                                      Alignment* alignment = new NeedlemanOverlap(-2.0, 1.0, -1.0, longest+1); //default gapopen, match, mismatch, longestbase
59 //                              
60 //                                      Nast nast(alignment, queryParts[i], parentParts[i]);
61 //                                      delete alignment;
62 //                              }
63 //                      }
64 //
65 //                      //recombine pieces to form new query sequence
66 //                      for (int i = 0; i < queryParts.size(); i++) {
67 //                              //sometimes the parent regions do not meet, for example region 1 may end at 1000 and region 2 starts at 1100.  
68 //                              //we don't want to loose length so in this case we will leave query alone
69 //                              if (i != 0) {
70 //                                      int space = parents[i].nastRegionStart - parents[i-1].nastRegionEnd - 1;
71 //                                      if (space > 0) { //they don't meet and we need to add query piece
72 //                                              string q = qAligned.substr(parents[i-1].nastRegionEnd+1, space);
73 //                                              newQuery += q;
74 //                                      }
75 //                              }
76 //
77 //                              newQuery += queryParts[i]->getAligned();
78 //                      }
79 //                      
80 //                      //make sure you don't cutoff end of query 
81 //                      if (parents[parents.size()-1].nastRegionEnd < (qAligned.length()-1)) {  newQuery += qAligned.substr(parents[parents.size()-1].nastRegionEnd+1);  }
82 //                      
83 //                      query->setAligned(newQuery);
84 //
85 //                      //free memory
86 //                      for (int i = 0; i < queryParts.size(); i++) { delete queryParts[i];  }
87 //                      for (int i = 0; i < parentParts.size(); i++) { delete parentParts[i];  }
88 //                      
89 //              } //else leave query alone, you have bigger problems...
90
91                 if(parents.size() != 0){
92
93                         alignmentLength = query->getAlignLength();      //x
94                         int queryUnalignedLength = query->getNumBases();        //y
95                         
96                         buildTemplateProfile(parents);
97                         
98                         createAlignMatrix(queryUnalignedLength, alignmentLength);
99                         fillAlignMatrix(query->getUnaligned());
100                         query->setAligned(getNewAlignment(query->getUnaligned()));
101                 }
102                 
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "ChimeraReAligner", "reAlign");
106                 exit(1);
107         }
108 }
109 /***************************************************************************************************************/
110
111 void ChimeraReAligner::buildTemplateProfile(vector<string> parents) {
112         try{    
113                 int numParents = parents.size();
114
115                 profile.resize(alignmentLength);
116                                 
117                 for(int i=0;i<numParents;i++){
118                         string seq = parents[i];
119
120                         for(int j=0;j<alignmentLength;j++){
121                                         
122                                 
123                                 if(seq[j] == 'A')               {       profile[j].A++;         }
124                                 else if(seq[j] == 'T')  {       profile[j].T++;         }
125                                 else if(seq[j] == 'G')  {       profile[j].G++;         }
126                                 else if(seq[j] == 'C')  {       profile[j].C++;         }
127                                 else if(seq[j] == '-')  {       profile[j].Gap++;       }
128                                 else if(seq[j] == '.')  {       profile[j].Gap++;       }
129 //                              else                                    {       profile[j].A++;         }
130                                 
131                         }
132                 }
133                 
134
135                 for(int i=0;i<alignmentLength;i++){
136                         profile[i].Chars = profile[i].A + profile[i].T + profile[i].G + profile[i].C;
137                 }
138                 
139         }
140         catch(exception& e) {
141                 m->errorOut(e, "ChimeraReAligner", "buildTemplateProfile");
142                 exit(1);
143         }
144 }
145         
146  
147 /***************************************************************************************************************/
148
149 void ChimeraReAligner::createAlignMatrix(int queryUnalignedLength, int alignmentLength){
150         
151         try{
152                 alignMatrix.resize(alignmentLength+1);
153                 for(int i=0;i<=alignmentLength;i++){
154                         alignMatrix[i].resize(queryUnalignedLength+1);
155                 }
156
157                 for(int i=1;i<=alignmentLength;i++)             {       alignMatrix[i][0].direction = 'l';      }
158                 for(int j=1;j<=queryUnalignedLength;j++){       alignMatrix[0][j].direction = 'u';      }
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "ChimeraReAligner", "createAlignMatrix");
162                 exit(1);
163         }
164 }
165
166 /***************************************************************************************************************/
167
168 void ChimeraReAligner::fillAlignMatrix(string query){
169         try{
170                 int GAP = -4;
171                 
172                 int nrows = alignMatrix.size()-1;
173                 int ncols = alignMatrix[0].size()-1;
174                                 
175                 for(int i=1;i<=nrows;i++){
176                         
177                         bases p = profile[i-1];
178                         int numChars = p.Chars;
179                         
180                         for(int j=1;j<=ncols;j++){
181                         
182                                 char q = query[j-1];
183                                 
184                                 //      score it for if there was a match
185                                 int maxScore = calcMatchScore(p, q) + alignMatrix[i-1][j-1].score;
186                                 int maxDirection = 'd';
187                                 
188                                 //      score it for if there was a gap in the query
189                                 int score = alignMatrix[i-1][j].score + (numChars * GAP);
190                                 if (score > maxScore) {
191                                         maxScore = score;
192                                         maxDirection = 'l';
193                                 }
194                                 
195                                 alignMatrix[i][j].score = maxScore;
196                                 alignMatrix[i][j].direction = maxDirection;
197                                 
198                         }
199                 }
200         }
201         catch(exception& e) {
202                 m->errorOut(e, "ChimeraReAligner", "fillAlignMatrix");
203                 exit(1);
204         }
205 }
206
207 /***************************************************************************************************************/
208
209 int ChimeraReAligner::calcMatchScore(bases p, char q){
210         try{
211                 
212                 int MATCH = 5;
213                 int MISMATCH = -4;
214                 
215                 int score = 0;
216                 
217                 if(q == 'G')            {       score = (MATCH * p.G + MISMATCH * (p.A + p.T + p.C + p.Gap));           }
218                 else if(q == 'A')       {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
219                 else if(q == 'T')       {       score = (MATCH * p.T + MISMATCH * (p.G + p.A + p.C + p.Gap));           }
220                 else if(q == 'C')       {       score = (MATCH * p.C + MISMATCH * (p.G + p.A + p.T + p.Gap));           }
221                 else                            {       score = (MATCH * p.A + MISMATCH * (p.G + p.T + p.C + p.Gap));           }
222
223                 return score;
224         }
225         catch(exception& e) {
226                 m->errorOut(e, "ChimeraReAligner", "calcMatchScore");
227                 exit(1);
228         }
229 }
230
231 /***************************************************************************************************************/
232
233 string ChimeraReAligner::getNewAlignment(string query){
234         try{
235                 string queryAlignment(alignmentLength, '.');
236                 string referenceAlignment(alignmentLength, '.');
237                 
238                 
239                 int maxScore = -99999999;
240                 
241                 int nrows = alignMatrix.size()-1;
242                 int ncols = alignMatrix[0].size()-1;
243
244                 int bestCol = -1;
245                 int bestRow = -1;
246                 
247                 for(int i=1;i<=nrows;i++){
248                         int score = alignMatrix[i][ncols].score;
249                         if (score > maxScore) {
250                                 maxScore = score;
251                                 bestRow = i;
252                                 bestCol = ncols;
253                         }
254                 }
255                 
256                 for(int j=1;j<=ncols;j++){
257                         int score = alignMatrix[nrows][j].score;
258                         if (score > maxScore) {
259                                 maxScore = score;
260                                 bestRow = nrows;
261                                 bestCol = j;
262                         }
263                 }
264                 
265                 int currentRow = bestRow;
266                 int currentCol = bestCol;
267                 
268                 int alignmentPosition = 0;
269                 if(currentRow < alignmentLength){
270                         for(int i=alignmentLength;i>currentRow;i--){
271                                 alignmentPosition++;
272                         }
273                 }
274                 
275                 AlignCell c = alignMatrix[currentRow][currentCol];
276                 while(c.direction != 'x'){
277                         
278                         char q;
279
280                         if(c.direction == 'd'){
281                                 q = query[currentCol-1];
282                                 currentCol--;
283                                 currentRow--;
284                         }
285                         
286                         
287                         else if (c.direction == 'u') {
288                                 break;
289                         }                                       
290                         else if(c.direction == 'l'){
291                                 char gapChar;
292                                 if(currentCol == 0)     {       gapChar = '.';  }
293                                 else                            {       gapChar = '-';  }
294                                                                 
295                                 q = gapChar;
296                                 currentRow--;
297                         }
298                         else{
299                                 cout << "you shouldn't be here..." << endl;
300                         }
301
302                         queryAlignment[alignmentPosition] = q;
303                         alignmentPosition++;
304                         c = alignMatrix[currentRow][currentCol];
305                 }
306
307 //              need to reverse the string
308                 string flipSeq = "";
309                 for(int i=alignmentLength-1;i>=0;i--){
310                         flipSeq += queryAlignment[i];                   
311                 }
312
313                 return flipSeq;
314         }
315         catch(exception& e) {
316                 m->errorOut(e, "ChimeraReAligner", "getNewAlignment");
317                 exit(1);
318         }
319 }
320
321 /***************************************************************************************************************/
322
323 // Sequence* ChimeraReAligner::getSequence(string name) {
324 //      try{
325 //              Sequence* temp;
326 //              
327 //              //look through templateSeqs til you find it
328 //              int spot = -1;
329 //              for (int i = 0; i < templateSeqs.size(); i++) {
330 //                      if (name == templateSeqs[i]->getName()) {  
331 //                              spot = i;
332 //                              break;
333 //                      }
334 //              }
335 //              
336 //              if(spot == -1) { m->mothurOut("Error: Could not find sequence."); m->mothurOutEndLine(); return NULL; }
337 //              
338 //              temp = new Sequence(templateSeqs[spot]->getName(), templateSeqs[spot]->getAligned());
339 //              
340 //              return temp;
341 //      }
342 //      catch(exception& e) {
343 //              m->errorOut(e, "ChimeraReAligner", "getSequence");
344 //              exit(1);
345 //      }
346 //}
347
348 //***************************************************************************************************************/