5 * Created by Pat Schloss on 8/12/11.
6 * Copyright 2011 Patrick D. Schloss. All rights reserved.
10 #include "myseqdist.h"
11 #include "sequence.hpp"
13 /**************************************************************************************************/
14 correctDist::correctDist(int p) : processors(p) {
16 m = MothurOut::getInstance();
19 m->errorOut(e, "correctDist", "correctDist");
23 /**************************************************************************************************/
24 correctDist::correctDist(string sequenceFileName, int p) : processors(p) {
26 m = MothurOut::getInstance();
27 getSequences(sequenceFileName);
30 m->errorOut(e, "correctDist", "correctDist");
34 /**************************************************************************************************/
35 int correctDist::addSeq(string seqName, string seqSeq){
37 names.push_back(seqName);
38 sequences.push_back(fixSequence(seqSeq));
42 m->errorOut(e, "correctDist", "addSeq");
46 /**************************************************************************************************/
47 int correctDist::execute(string distanceFileName){
49 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
53 correctMatrix.resize(4);
54 for(int i=0;i<4;i++){ correctMatrix[i].resize(4); }
56 correctMatrix[0][0] = 0.000000; //AA
57 correctMatrix[1][0] = 11.619259; //CA
58 correctMatrix[2][0] = 11.694004; //TA
59 correctMatrix[3][0] = 7.748623; //GA
61 correctMatrix[1][1] = 0.000000; //CC
62 correctMatrix[2][1] = 7.619657; //TC
63 correctMatrix[3][1] = 12.852562; //GC
65 correctMatrix[2][2] = 0.000000; //TT
66 correctMatrix[3][2] = 10.964048; //TG
68 correctMatrix[3][3] = 0.000000; //GG
72 correctMatrix[j][i] = correctMatrix[i][j];
76 numSeqs = names.size();
78 if(processors == 1){ driver(0, numSeqs, distanceFileName); }
81 for(int i=0;i<processors;i++){
82 start.push_back(int (sqrt(float(i)/float(processors)) * numSeqs));
83 end.push_back(int (sqrt(float(i+1)/float(processors)) * numSeqs));
86 createProcess(distanceFileName);
92 m->errorOut(e, "correctDist", "execute");
96 /**************************************************************************************************/
97 int correctDist::getSequences(string sequenceFileName){
99 ifstream sequenceFile;
100 m->openInputFile(sequenceFileName, sequenceFile);
101 string seqName, seqSeq;
103 while(!sequenceFile.eof()){
104 if (m->control_pressed) { break; }
106 Sequence temp(sequenceFile); m->gobble(sequenceFile);
108 if (temp.getName() != "") {
109 names.push_back(temp.getName());
110 sequences.push_back(fixSequence(temp.getAligned()));
113 sequenceFile.close();
116 catch(exception& e) {
117 m->errorOut(e, "correctDist", "getSequences");
122 /**************************************************************************************************/
123 vector<int> correctDist::fixSequence(string sequence){
125 int alignLength = sequence.length();
127 vector<int> seqVector;
129 for(int i=0;i<alignLength;i++){
130 if(sequence[i] == 'A') { seqVector.push_back(0); }
131 else if(sequence[i] == 'C') { seqVector.push_back(1); }
132 else if(sequence[i] == 'T') { seqVector.push_back(2); }
133 else if(sequence[i] == 'G') { seqVector.push_back(3); }
134 else if(sequence[i] == 'N') { seqVector.push_back(0); }//hmmmm....
139 catch(exception& e) {
140 m->errorOut(e, "correctDist", "fixSequence");
145 /**************************************************************************************************/
147 int correctDist::createProcess(string distanceFileName){
149 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
151 vector<int> processIDs;
153 while(process != processors){
158 processIDs.push_back(pid);
162 driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp");
166 cout << "Doh!" << endl;
167 for (int i=0;i<processIDs.size();i++) { int temp = processIDs[i]; kill(temp, SIGINT); }
172 driver(start[0], end[0], distanceFileName);
174 for (int i=0;i<processIDs.size();i++) {
175 int temp = processIDs[i];
179 for(int i=0;i<processIDs.size();i++){
180 m->appendFiles((distanceFileName + toString(processIDs[i]) + ".temp"), distanceFileName);
181 remove((distanceFileName + toString(processIDs[i]) + ".temp").c_str());
186 catch(exception& e) {
187 m->errorOut(e, "correctDist", "createProcess");
192 /**************************************************************************************************/
194 int correctDist::driver(int start, int end, string distFileName){
197 m->openOutputFile(distFileName, distFile);
198 distFile << setprecision(9);
201 distFile << numSeqs << endl;
204 int startTime = time(NULL);
206 m->mothurOut("\nCalculating distances...\n");
208 for(int i=start;i<end;i++){
212 for(int j=0;j<i;j++){
214 if (m->control_pressed) { distFile.close(); return 0; }
216 double dist = getDist(sequences[i], sequences[j]);
218 distFile << ' ' << dist;
222 if(i % 100 == 0){ m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); }
226 if((end-1) % 100 != 0){ m->mothurOut(toString(end-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); }
227 m->mothurOut("Done.\n");
231 catch(exception& e) {
232 m->errorOut(e, "correctDist", "driver");
236 /**************************************************************************************************/
237 double correctDist::getDist(vector<int>& seqA, vector<int>& seqB){
240 int lengthA = seqA.size();
241 int lengthB = seqB.size();
243 vector<vector<double> > alignMatrix(lengthA+1);
244 vector<vector<char> > alignMoves(lengthA+1);
246 for(int i=0;i<=lengthA;i++){
247 alignMatrix[i].resize(lengthB+1, 0);
248 alignMoves[i].resize(lengthB+1, 'x');
251 for(int i=0;i<=lengthA;i++){
252 alignMatrix[i][0] = 15.0 * i;
253 alignMoves[i][0] = 'u';
255 for(int i=0;i<=lengthB;i++){
256 alignMatrix[0][i] = 15.0 * i;
257 alignMoves[0][i] = 'l';
260 for(int i=1;i<=lengthA;i++){
261 for(int j=1;j<=lengthB;j++){
263 if (m->control_pressed) { return 0; }
266 nogap = alignMatrix[i-1][j-1] + correctMatrix[seqA[i-1]][seqB[j-1]];
271 if(i == lengthA){ //terminal gap
272 left = alignMatrix[i][j-1];
275 if(seqB[j-1] == getLastMatch('l', alignMoves, i, j, seqA, seqB)){
282 left = alignMatrix[i][j-1] + gap;
287 if(j == lengthB){ //terminal gap
288 up = alignMatrix[i-1][j];
292 if(seqA[i-1] == getLastMatch('u', alignMoves, i, j, seqA, seqB)){
299 up = alignMatrix[i-1][j] + gap;
306 alignMoves[i][j] = 'd';
307 alignMatrix[i][j] = nogap;
310 alignMoves[i][j] = 'u';
311 alignMatrix[i][j] = up;
316 alignMoves[i][j] = 'l';
317 alignMatrix[i][j] = left;
320 alignMoves[i][j] = 'u';
321 alignMatrix[i][j] = up;
332 // string alignA = "";
333 // string alignB = "";
334 // string bases = "ACTG";
336 // for(int i=0;i<lengthA;i++){
337 // cout << bases[seqA[i]];
340 // for(int i=0;i<lengthB;i++){
341 // cout << bases[seqB[i]];
344 while(i > 0 && j > 0){
346 if (m->control_pressed) { return 0; }
348 if(alignMoves[i][j] == 'd'){
349 // alignA = bases[seqA[i-1]] + alignA;
350 // alignB = bases[seqB[j-1]] + alignB;
356 else if(alignMoves[i][j] == 'u'){
358 // alignA = bases[seqA[i-1]] + alignA;
359 // alignB = '-' + alignB;
365 else if(alignMoves[i][j] == 'l'){
367 // alignA = '-' + alignA;
368 // alignB = bases[seqB[j-1]] + alignB;
376 // cout << alignA << endl << alignB << endl;
378 return alignMatrix[lengthA][lengthB] / (double)count;
380 catch(exception& e) {
381 m->errorOut(e, "correctDist", "getDist");
385 /**************************************************************************************************/
386 int correctDist::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, vector<int>& seqA, vector<int>& seqB){
389 char nullReturn = -1;
393 if (m->control_pressed) { return nullReturn; }
395 if(direction == 'd'){
396 if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; }
397 else { return nullReturn; }
400 else if(direction == 'l') { j--; }
403 direction = alignMoves[i][j];
408 catch(exception& e) {
409 m->errorOut(e, "correctDist", "getLastMatch");
413 /**************************************************************************************************/