]> git.donarmstrong.com Git - mothur.git/blob - myseqdist.cpp
working on pam
[mothur.git] / myseqdist.cpp
1 /*
2  *  pds.seqdist.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 8/12/11.
6  *  Copyright 2011 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "myseqdist.h"
11 #include "sequence.hpp"
12
13 /**************************************************************************************************/
14 correctDist::correctDist(int p) : processors(p) {
15         try {
16                 m = MothurOut::getInstance();
17         }
18         catch(exception& e) {
19                 m->errorOut(e, "correctDist", "correctDist");
20                 exit(1);
21         }
22 }
23 /**************************************************************************************************/
24 correctDist::correctDist(string sequenceFileName, int p) : processors(p) {
25         try {
26                 m = MothurOut::getInstance();
27                 getSequences(sequenceFileName);
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "correctDist", "correctDist");
31                 exit(1);
32         }
33 }
34 /**************************************************************************************************/
35 int correctDist::addSeq(string seqName, string seqSeq){
36         try {
37                 names.push_back(seqName);
38                 sequences.push_back(fixSequence(seqSeq));
39                 return 0;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "correctDist", "addSeq");
43                 exit(1);
44         }
45 }
46 /**************************************************************************************************/
47 int correctDist::execute(string distanceFileName){
48         try {
49 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
50 #else
51                 processors = 1;
52 #endif
53                 correctMatrix.resize(4);
54                 for(int i=0;i<4;i++){   correctMatrix[i].resize(4);     }
55                 
56                 correctMatrix[0][0] = 0.000000;         //AA
57                 correctMatrix[1][0] = 11.619259;        //CA
58                 correctMatrix[2][0] = 11.694004;        //TA
59                 correctMatrix[3][0] = 7.748623;         //GA
60                 
61                 correctMatrix[1][1] = 0.000000;         //CC
62                 correctMatrix[2][1] = 7.619657;         //TC
63                 correctMatrix[3][1] = 12.852562;        //GC
64                 
65                 correctMatrix[2][2] = 0.000000;         //TT
66                 correctMatrix[3][2] = 10.964048;        //TG
67                 
68                 correctMatrix[3][3] = 0.000000;         //GG
69                 
70                 for(int i=0;i<4;i++){
71                         for(int j=0;j<i;j++){
72                                 correctMatrix[j][i] = correctMatrix[i][j];
73                         }
74                 }
75                 
76                 numSeqs = names.size();
77                                 
78                 if(processors == 1){ driver(0, numSeqs, distanceFileName); }
79                 else{
80                         
81                         for(int i=0;i<processors;i++){
82                                 start.push_back(int (sqrt(float(i)/float(processors)) * numSeqs));
83                                 end.push_back(int (sqrt(float(i+1)/float(processors)) * numSeqs));
84                         }
85                         
86                         createProcess(distanceFileName);
87                 }
88                 
89                 return 0;
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "correctDist", "execute");
93                 exit(1);
94         }
95 }
96 /**************************************************************************************************/
97 int correctDist::getSequences(string sequenceFileName){
98         try {
99                 ifstream sequenceFile;
100                 m->openInputFile(sequenceFileName, sequenceFile);
101                 string seqName, seqSeq;
102                 
103                 while(!sequenceFile.eof()){
104                         if (m->control_pressed) { break; }
105                         
106                         Sequence temp(sequenceFile); m->gobble(sequenceFile);
107                         
108                         if (temp.getName() != "") {
109                                 names.push_back(temp.getName());
110                                 sequences.push_back(fixSequence(temp.getAligned()));
111                         }
112                 }
113                 sequenceFile.close();
114                 return 0;
115         }
116         catch(exception& e) {
117                 m->errorOut(e, "correctDist", "getSequences");
118                 exit(1);
119         }       
120 }
121
122 /**************************************************************************************************/
123 vector<int> correctDist::fixSequence(string sequence){
124         try {
125                 int alignLength = sequence.length();
126                 
127                 vector<int> seqVector;
128                 
129                 for(int i=0;i<alignLength;i++){
130                         if(sequence[i] == 'A')          {       seqVector.push_back(0);         }
131                         else if(sequence[i] == 'C')     {       seqVector.push_back(1);         }
132                         else if(sequence[i] == 'T')     {       seqVector.push_back(2);         }
133                         else if(sequence[i] == 'G')     {       seqVector.push_back(3);         }
134                         else if(sequence[i] == 'N')     {       seqVector.push_back(0);         }//hmmmm....
135                 }
136                 
137                 return seqVector;
138         }
139         catch(exception& e) {
140                 m->errorOut(e, "correctDist", "fixSequence");
141                 exit(1);
142         }       
143 }
144
145 /**************************************************************************************************/
146
147 int correctDist::createProcess(string distanceFileName){
148         try {
149 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
150                 int process = 1;
151                 vector<int> processIDs;
152                 
153                 while(process != processors){
154                         
155                         int pid = fork();
156                         
157                         if(pid > 0){
158                                 processIDs.push_back(pid);
159                                 process++;
160                         }
161                         else if(pid == 0){
162                                 driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp");
163                                 exit(0);
164                         }
165                         else{
166                                 cout << "Doh!" << endl;
167                                 for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill(temp, SIGINT); }
168                                 exit(0);
169                         }
170                 }
171                 
172                 driver(start[0], end[0], distanceFileName);
173                 
174                 for (int i=0;i<processIDs.size();i++) { 
175                         int temp = processIDs[i];
176                         wait(&temp);
177                 }
178                 
179                 for(int i=0;i<processIDs.size();i++){
180                         m->appendFiles((distanceFileName + toString(processIDs[i]) + ".temp"), distanceFileName);
181                         remove((distanceFileName + toString(processIDs[i]) + ".temp").c_str());
182                 }
183 #endif
184                 return 0;
185         }
186         catch(exception& e) {
187                 m->errorOut(e, "correctDist", "createProcess");
188                 exit(1);
189         }       
190 }
191
192 /**************************************************************************************************/
193
194 int correctDist::driver(int start, int end, string distFileName){
195         try {
196                 ofstream distFile;
197                 m->openOutputFile(distFileName, distFile);
198                 distFile << setprecision(9);
199                 
200                 if(start == 0){
201                         distFile << numSeqs << endl;
202                 }
203                 
204                 int startTime = time(NULL);
205                 
206                 m->mothurOut("\nCalculating distances...\n");
207                 
208                 for(int i=start;i<end;i++){
209                         
210                         distFile << i;
211                         
212                         for(int j=0;j<i;j++){
213                                 
214                                 if (m->control_pressed) { distFile.close(); return 0; }
215                                 
216                                 double dist = getDist(sequences[i], sequences[j]);
217                                 
218                                 distFile << ' ' << dist;
219                         }
220                         distFile << endl;
221                         
222                         if(i % 100 == 0){ m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); }
223                 }
224                 distFile.close();
225                 
226                 if((end-1) % 100 != 0){ m->mothurOutJustToScreen(toString(end-1) + "\t" + toString(time(NULL) - startTime)+"\n"); }
227                 m->mothurOut("Done.\n");
228                 
229                 return 0;
230         }
231         catch(exception& e) {
232                 m->errorOut(e, "correctDist", "driver");
233                 exit(1);
234         }       
235 }
236 /**************************************************************************************************/
237 double correctDist::getDist(vector<int>& seqA, vector<int>& seqB){
238         try {
239                 
240                 int lengthA = seqA.size();
241                 int lengthB = seqB.size();
242                 
243                 vector<vector<double> > alignMatrix(lengthA+1);
244                 vector<vector<char> > alignMoves(lengthA+1);
245                 
246                 for(int i=0;i<=lengthA;i++){
247                         alignMatrix[i].resize(lengthB+1, 0);
248                         alignMoves[i].resize(lengthB+1, 'x');
249                 }
250                 
251                 for(int i=0;i<=lengthA;i++){
252                         alignMatrix[i][0] = 15.0 * i;
253                         alignMoves[i][0] = 'u';
254                 }
255                 for(int i=0;i<=lengthB;i++){
256                         alignMatrix[0][i] = 15.0 * i;
257                         alignMoves[0][i] = 'l';
258                 }
259                 
260                 for(int i=1;i<=lengthA;i++){
261                         for(int j=1;j<=lengthB;j++){
262                                 
263                                 if (m->control_pressed) {  return 0;  }
264                                 
265                                 double nogap;           
266                                 nogap = alignMatrix[i-1][j-1] + correctMatrix[seqA[i-1]][seqB[j-1]];
267                                 
268                                 
269                                 double gap;
270                                 double left;
271                                 if(i == lengthA){ //terminal gap
272                                         left = alignMatrix[i][j-1];
273                                 }
274                                 else{
275                                         if(seqB[j-1] == getLastMatch('l', alignMoves, i, j, seqA, seqB)){
276                                                 gap = 4.0;
277                                         }
278                                         else{
279                                                 gap = 15.0;
280                                         }
281                                         
282                                         left = alignMatrix[i][j-1] + gap;
283                                 }
284                                 
285                                 
286                                 double up;
287                                 if(j == lengthB){ //terminal gap
288                                         up = alignMatrix[i-1][j];
289                                 }
290                                 else{
291                                         
292                                         if(seqA[i-1] == getLastMatch('u', alignMoves, i, j, seqA, seqB)){
293                                                 gap = 4.0;
294                                         }
295                                         else{
296                                                 gap = 15.0;
297                                         }
298                                         
299                                         up = alignMatrix[i-1][j] + gap;
300                                 }
301                                 
302                                 
303                                 
304                                 if(nogap < left){
305                                         if(nogap < up){
306                                                 alignMoves[i][j] = 'd';
307                                                 alignMatrix[i][j] = nogap;
308                                         }
309                                         else{
310                                                 alignMoves[i][j] = 'u';
311                                                 alignMatrix[i][j] = up;
312                                         }
313                                 }
314                                 else{
315                                         if(left < up){
316                                                 alignMoves[i][j] = 'l';
317                                                 alignMatrix[i][j] = left;
318                                         }
319                                         else{
320                                                 alignMoves[i][j] = 'u';
321                                                 alignMatrix[i][j] = up;
322                                         }
323                                 }
324                         }
325                 }
326                 
327                 int i = lengthA;
328                 int j = lengthB;
329                 int count = 0;
330                 
331                 
332                 //      string alignA = "";
333                 //      string alignB = "";
334                 //      string bases = "ACTG";
335                 //      
336                 //      for(int i=0;i<lengthA;i++){
337                 //              cout << bases[seqA[i]];
338                 //      }cout << endl;
339                 //
340                 //      for(int i=0;i<lengthB;i++){
341                 //              cout << bases[seqB[i]];
342                 //      }cout << endl;
343                 
344                 while(i > 0 && j > 0){
345                         
346                         if (m->control_pressed) {  return 0;  }
347                         
348                         if(alignMoves[i][j] == 'd'){
349                                 //                      alignA = bases[seqA[i-1]] + alignA;
350                                 //                      alignB = bases[seqB[j-1]] + alignB;
351                                 
352                                 count++;
353                                 i--;
354                                 j--;
355                         }
356                         else if(alignMoves[i][j] == 'u'){
357                                 if(j != lengthB){
358                                         //                              alignA = bases[seqA[i-1]] + alignA;
359                                         //                              alignB = '-' + alignB;
360                                         count++;
361                                 }
362                                 
363                                 i--;
364                         }
365                         else if(alignMoves[i][j] == 'l'){
366                                 if(i != lengthA){
367                                         //                              alignA = '-' + alignA;
368                                         //                              alignB = bases[seqB[j-1]] + alignB;
369                                         count++;
370                                 }
371                                 
372                                 j--;
373                         }
374                 }
375                 
376                 //      cout << alignA << endl << alignB << endl;
377                 
378                 return alignMatrix[lengthA][lengthB] / (double)count;
379         }
380         catch(exception& e) {
381                 m->errorOut(e, "correctDist", "getDist");
382                 exit(1);
383         }       
384 }
385 /**************************************************************************************************/
386 int correctDist::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, vector<int>& seqA, vector<int>& seqB){
387         try {
388                 
389                 char nullReturn = -1;
390                 
391                 while(i>=1 && j>=1){
392                         
393                         if (m->control_pressed) { return nullReturn; }
394                         
395                         if(direction == 'd'){
396                                 if(seqA[i-1] == seqB[j-1])      {       return seqA[i-1];       }
397                                 else                                            {       return nullReturn;      }
398                         }
399                         
400                         else if(direction == 'l')               {       j--;                            }
401                         else                                                    {       i--;                            }
402                         
403                         direction = alignMoves[i][j];
404                 }
405                 
406                 return nullReturn;
407         }
408         catch(exception& e) {
409                 m->errorOut(e, "correctDist", "getLastMatch");
410                 exit(1);
411         }       
412 }
413 /**************************************************************************************************/
414
415
416