X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=myseqdist.cpp;fp=myseqdist.cpp;h=78255d8f73d894c7cb4032d925b684843094bee7;hb=0caf3fbabaa3ece404f8ce77f4c883dc5b1bf1dc;hp=0000000000000000000000000000000000000000;hpb=1b73ff67c83892a025e597dabd9df6fe7b58206a;p=mothur.git diff --git a/myseqdist.cpp b/myseqdist.cpp new file mode 100644 index 0000000..78255d8 --- /dev/null +++ b/myseqdist.cpp @@ -0,0 +1,416 @@ +/* + * pds.seqdist.cpp + * + * + * Created by Pat Schloss on 8/12/11. + * Copyright 2011 Patrick D. Schloss. All rights reserved. + * + */ + +#include "myseqdist.h" +#include "sequence.hpp" + +/**************************************************************************************************/ +correctDist::correctDist(int p) : processors(p) { + try { + m = MothurOut::getInstance(); + } + catch(exception& e) { + m->errorOut(e, "correctDist", "correctDist"); + exit(1); + } +} +/**************************************************************************************************/ +correctDist::correctDist(string sequenceFileName, int p) : processors(p) { + try { + m = MothurOut::getInstance(); + getSequences(sequenceFileName); + } + catch(exception& e) { + m->errorOut(e, "correctDist", "correctDist"); + exit(1); + } +} +/**************************************************************************************************/ +int correctDist::addSeq(string seqName, string seqSeq){ + try { + names.push_back(seqName); + sequences.push_back(fixSequence(seqSeq)); + return 0; + } + catch(exception& e) { + m->errorOut(e, "correctDist", "addSeq"); + exit(1); + } +} +/**************************************************************************************************/ +int correctDist::execute(string distanceFileName){ + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else + processors = 1; +#endif + correctMatrix.resize(4); + for(int i=0;i<4;i++){ correctMatrix[i].resize(4); } + + correctMatrix[0][0] = 0.000000; //AA + correctMatrix[1][0] = 11.619259; //CA + correctMatrix[2][0] = 11.694004; //TA + correctMatrix[3][0] = 7.748623; //GA + + correctMatrix[1][1] = 0.000000; //CC + correctMatrix[2][1] = 7.619657; //TC + correctMatrix[3][1] = 12.852562; //GC + + correctMatrix[2][2] = 0.000000; //TT + correctMatrix[3][2] = 10.964048; //TG + + correctMatrix[3][3] = 0.000000; //GG + + for(int i=0;i<4;i++){ + for(int j=0;jerrorOut(e, "correctDist", "execute"); + exit(1); + } +} +/**************************************************************************************************/ +int correctDist::getSequences(string sequenceFileName){ + try { + ifstream sequenceFile; + m->openInputFile(sequenceFileName, sequenceFile); + string seqName, seqSeq; + + while(!sequenceFile.eof()){ + if (m->control_pressed) { break; } + + Sequence temp(sequenceFile); m->gobble(sequenceFile); + + if (temp.getName() != "") { + names.push_back(temp.getName()); + sequences.push_back(fixSequence(temp.getAligned())); + } + } + sequenceFile.close(); + return 0; + } + catch(exception& e) { + m->errorOut(e, "correctDist", "getSequences"); + exit(1); + } +} + +/**************************************************************************************************/ +vector correctDist::fixSequence(string sequence){ + try { + int alignLength = sequence.length(); + + vector seqVector; + + for(int i=0;ierrorOut(e, "correctDist", "fixSequence"); + exit(1); + } +} + +/**************************************************************************************************/ + +int correctDist::createProcess(string distanceFileName){ + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + int process = 1; + vector processIDs; + + while(process != processors){ + + int pid = fork(); + + if(pid > 0){ + processIDs.push_back(pid); + process++; + } + else if(pid == 0){ + driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp"); + exit(0); + } + else{ + cout << "Doh!" << endl; + for (int i=0;iappendFiles((distanceFileName + toString(processIDs[i]) + ".temp"), distanceFileName); + remove((distanceFileName + toString(processIDs[i]) + ".temp").c_str()); + } +#endif + return 0; + } + catch(exception& e) { + m->errorOut(e, "correctDist", "createProcess"); + exit(1); + } +} + +/**************************************************************************************************/ + +int correctDist::driver(int start, int end, string distFileName){ + try { + ofstream distFile; + m->openOutputFile(distFileName, distFile); + distFile << setprecision(9); + + if(start == 0){ + distFile << numSeqs << endl; + } + + int startTime = time(NULL); + + m->mothurOut("\nCalculating distances...\n"); + + for(int i=start;icontrol_pressed) { distFile.close(); return 0; } + + double dist = getDist(sequences[i], sequences[j]); + + distFile << ' ' << dist; + } + distFile << endl; + + if(i % 100 == 0){ m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); } + } + distFile.close(); + + if((end-1) % 100 != 0){ m->mothurOut(toString(end-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); } + m->mothurOut("Done.\n"); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "correctDist", "driver"); + exit(1); + } +} +/**************************************************************************************************/ +double correctDist::getDist(vector& seqA, vector& seqB){ + try { + + int lengthA = seqA.size(); + int lengthB = seqB.size(); + + vector > alignMatrix(lengthA+1); + vector > alignMoves(lengthA+1); + + for(int i=0;i<=lengthA;i++){ + alignMatrix[i].resize(lengthB+1, 0); + alignMoves[i].resize(lengthB+1, 'x'); + } + + for(int i=0;i<=lengthA;i++){ + alignMatrix[i][0] = 15.0 * i; + alignMoves[i][0] = 'u'; + } + for(int i=0;i<=lengthB;i++){ + alignMatrix[0][i] = 15.0 * i; + alignMoves[0][i] = 'l'; + } + + for(int i=1;i<=lengthA;i++){ + for(int j=1;j<=lengthB;j++){ + + if (m->control_pressed) { return 0; } + + double nogap; + nogap = alignMatrix[i-1][j-1] + correctMatrix[seqA[i-1]][seqB[j-1]]; + + + double gap; + double left; + if(i == lengthA){ //terminal gap + left = alignMatrix[i][j-1]; + } + else{ + if(seqB[j-1] == getLastMatch('l', alignMoves, i, j, seqA, seqB)){ + gap = 4.0; + } + else{ + gap = 15.0; + } + + left = alignMatrix[i][j-1] + gap; + } + + + double up; + if(j == lengthB){ //terminal gap + up = alignMatrix[i-1][j]; + } + else{ + + if(seqA[i-1] == getLastMatch('u', alignMoves, i, j, seqA, seqB)){ + gap = 4.0; + } + else{ + gap = 15.0; + } + + up = alignMatrix[i-1][j] + gap; + } + + + + if(nogap < left){ + if(nogap < up){ + alignMoves[i][j] = 'd'; + alignMatrix[i][j] = nogap; + } + else{ + alignMoves[i][j] = 'u'; + alignMatrix[i][j] = up; + } + } + else{ + if(left < up){ + alignMoves[i][j] = 'l'; + alignMatrix[i][j] = left; + } + else{ + alignMoves[i][j] = 'u'; + alignMatrix[i][j] = up; + } + } + } + } + + int i = lengthA; + int j = lengthB; + int count = 0; + + + // string alignA = ""; + // string alignB = ""; + // string bases = "ACTG"; + // + // for(int i=0;i 0 && j > 0){ + + if (m->control_pressed) { return 0; } + + if(alignMoves[i][j] == 'd'){ + // alignA = bases[seqA[i-1]] + alignA; + // alignB = bases[seqB[j-1]] + alignB; + + count++; + i--; + j--; + } + else if(alignMoves[i][j] == 'u'){ + if(j != lengthB){ + // alignA = bases[seqA[i-1]] + alignA; + // alignB = '-' + alignB; + count++; + } + + i--; + } + else if(alignMoves[i][j] == 'l'){ + if(i != lengthA){ + // alignA = '-' + alignA; + // alignB = bases[seqB[j-1]] + alignB; + count++; + } + + j--; + } + } + + // cout << alignA << endl << alignB << endl; + + return alignMatrix[lengthA][lengthB] / (double)count; + } + catch(exception& e) { + m->errorOut(e, "correctDist", "getDist"); + exit(1); + } +} +/**************************************************************************************************/ +int correctDist::getLastMatch(char direction, vector >& alignMoves, int i, int j, vector& seqA, vector& seqB){ + try { + + char nullReturn = -1; + + while(i>=1 && j>=1){ + + if (m->control_pressed) { return nullReturn; } + + if(direction == 'd'){ + if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; } + else { return nullReturn; } + } + + else if(direction == 'l') { j--; } + else { i--; } + + direction = alignMoves[i][j]; + } + + return nullReturn; + } + catch(exception& e) { + m->errorOut(e, "correctDist", "getLastMatch"); + exit(1); + } +} +/**************************************************************************************************/ + + +