+++ /dev/null
-/*
- * pds.seqdist.cpp
- *
- *
- * Created by Pat Schloss on 8/12/11.
- * Copyright 2011 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "myseqdist.h"
-#include "sequence.hpp"
-
-/**************************************************************************************************/
-correctDist::correctDist(int p) : processors(p) {
- try {
- m = MothurOut::getInstance();
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "correctDist");
- exit(1);
- }
-}
-/**************************************************************************************************/
-correctDist::correctDist(string sequenceFileName, int p) : processors(p) {
- try {
- m = MothurOut::getInstance();
- getSequences(sequenceFileName);
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "correctDist");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int correctDist::addSeq(string seqName, string seqSeq){
- try {
- names.push_back(seqName);
- sequences.push_back(fixSequence(seqSeq));
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "addSeq");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int correctDist::execute(string distanceFileName){
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
- processors = 1;
-#endif
- correctMatrix.resize(4);
- for(int i=0;i<4;i++){ correctMatrix[i].resize(4); }
-
- correctMatrix[0][0] = 0.000000; //AA
- correctMatrix[1][0] = 11.619259; //CA
- correctMatrix[2][0] = 11.694004; //TA
- correctMatrix[3][0] = 7.748623; //GA
-
- correctMatrix[1][1] = 0.000000; //CC
- correctMatrix[2][1] = 7.619657; //TC
- correctMatrix[3][1] = 12.852562; //GC
-
- correctMatrix[2][2] = 0.000000; //TT
- correctMatrix[3][2] = 10.964048; //TG
-
- correctMatrix[3][3] = 0.000000; //GG
-
- for(int i=0;i<4;i++){
- for(int j=0;j<i;j++){
- correctMatrix[j][i] = correctMatrix[i][j];
- }
- }
-
- numSeqs = names.size();
-
- if(processors == 1){ driver(0, numSeqs, distanceFileName); }
- else{
-
- for(int i=0;i<processors;i++){
- start.push_back(int (sqrt(float(i)/float(processors)) * numSeqs));
- end.push_back(int (sqrt(float(i+1)/float(processors)) * numSeqs));
- }
-
- createProcess(distanceFileName);
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "execute");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int correctDist::getSequences(string sequenceFileName){
- try {
- ifstream sequenceFile;
- m->openInputFile(sequenceFileName, sequenceFile);
- string seqName, seqSeq;
-
- while(!sequenceFile.eof()){
- if (m->control_pressed) { break; }
-
- Sequence temp(sequenceFile); m->gobble(sequenceFile);
-
- if (temp.getName() != "") {
- names.push_back(temp.getName());
- sequences.push_back(fixSequence(temp.getAligned()));
- }
- }
- sequenceFile.close();
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "getSequences");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-vector<int> correctDist::fixSequence(string sequence){
- try {
- int alignLength = sequence.length();
-
- vector<int> seqVector;
-
- for(int i=0;i<alignLength;i++){
- if(sequence[i] == 'A') { seqVector.push_back(0); }
- else if(sequence[i] == 'C') { seqVector.push_back(1); }
- else if(sequence[i] == 'T') { seqVector.push_back(2); }
- else if(sequence[i] == 'G') { seqVector.push_back(3); }
- else if(sequence[i] == 'N') { seqVector.push_back(0); }//hmmmm....
- }
-
- return seqVector;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "fixSequence");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-int correctDist::createProcess(string distanceFileName){
- try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- int process = 1;
- vector<int> processIDs;
-
- while(process != processors){
-
- int pid = fork();
-
- if(pid > 0){
- processIDs.push_back(pid);
- process++;
- }
- else if(pid == 0){
- driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp");
- exit(0);
- }
- else{
- cout << "Doh!" << endl;
- for (int i=0;i<processIDs.size();i++) { int temp = processIDs[i]; kill(temp, SIGINT); }
- exit(0);
- }
- }
-
- driver(start[0], end[0], distanceFileName);
-
- for (int i=0;i<processIDs.size();i++) {
- int temp = processIDs[i];
- wait(&temp);
- }
-
- for(int i=0;i<processIDs.size();i++){
- m->appendFiles((distanceFileName + toString(processIDs[i]) + ".temp"), distanceFileName);
- remove((distanceFileName + toString(processIDs[i]) + ".temp").c_str());
- }
-#endif
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "createProcess");
- exit(1);
- }
-}
-
-/**************************************************************************************************/
-
-int correctDist::driver(int start, int end, string distFileName){
- try {
- ofstream distFile;
- m->openOutputFile(distFileName, distFile);
- distFile << setprecision(9);
-
- if(start == 0){
- distFile << numSeqs << endl;
- }
-
- int startTime = time(NULL);
-
- m->mothurOut("\nCalculating distances...\n");
-
- for(int i=start;i<end;i++){
-
- distFile << i;
-
- for(int j=0;j<i;j++){
-
- if (m->control_pressed) { distFile.close(); return 0; }
-
- double dist = getDist(sequences[i], sequences[j]);
-
- distFile << ' ' << dist;
- }
- distFile << endl;
-
- if(i % 100 == 0){ m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); }
- }
- distFile.close();
-
- if((end-1) % 100 != 0){ m->mothurOut(toString(end-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); }
- m->mothurOut("Done.\n");
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "driver");
- exit(1);
- }
-}
-/**************************************************************************************************/
-double correctDist::getDist(vector<int>& seqA, vector<int>& seqB){
- try {
-
- int lengthA = seqA.size();
- int lengthB = seqB.size();
-
- vector<vector<double> > alignMatrix(lengthA+1);
- vector<vector<char> > alignMoves(lengthA+1);
-
- for(int i=0;i<=lengthA;i++){
- alignMatrix[i].resize(lengthB+1, 0);
- alignMoves[i].resize(lengthB+1, 'x');
- }
-
- for(int i=0;i<=lengthA;i++){
- alignMatrix[i][0] = 15.0 * i;
- alignMoves[i][0] = 'u';
- }
- for(int i=0;i<=lengthB;i++){
- alignMatrix[0][i] = 15.0 * i;
- alignMoves[0][i] = 'l';
- }
-
- for(int i=1;i<=lengthA;i++){
- for(int j=1;j<=lengthB;j++){
-
- if (m->control_pressed) { return 0; }
-
- double nogap;
- nogap = alignMatrix[i-1][j-1] + correctMatrix[seqA[i-1]][seqB[j-1]];
-
-
- double gap;
- double left;
- if(i == lengthA){ //terminal gap
- left = alignMatrix[i][j-1];
- }
- else{
- if(seqB[j-1] == getLastMatch('l', alignMoves, i, j, seqA, seqB)){
- gap = 4.0;
- }
- else{
- gap = 15.0;
- }
-
- left = alignMatrix[i][j-1] + gap;
- }
-
-
- double up;
- if(j == lengthB){ //terminal gap
- up = alignMatrix[i-1][j];
- }
- else{
-
- if(seqA[i-1] == getLastMatch('u', alignMoves, i, j, seqA, seqB)){
- gap = 4.0;
- }
- else{
- gap = 15.0;
- }
-
- up = alignMatrix[i-1][j] + gap;
- }
-
-
-
- if(nogap < left){
- if(nogap < up){
- alignMoves[i][j] = 'd';
- alignMatrix[i][j] = nogap;
- }
- else{
- alignMoves[i][j] = 'u';
- alignMatrix[i][j] = up;
- }
- }
- else{
- if(left < up){
- alignMoves[i][j] = 'l';
- alignMatrix[i][j] = left;
- }
- else{
- alignMoves[i][j] = 'u';
- alignMatrix[i][j] = up;
- }
- }
- }
- }
-
- int i = lengthA;
- int j = lengthB;
- int count = 0;
-
-
- // string alignA = "";
- // string alignB = "";
- // string bases = "ACTG";
- //
- // for(int i=0;i<lengthA;i++){
- // cout << bases[seqA[i]];
- // }cout << endl;
- //
- // for(int i=0;i<lengthB;i++){
- // cout << bases[seqB[i]];
- // }cout << endl;
-
- while(i > 0 && j > 0){
-
- if (m->control_pressed) { return 0; }
-
- if(alignMoves[i][j] == 'd'){
- // alignA = bases[seqA[i-1]] + alignA;
- // alignB = bases[seqB[j-1]] + alignB;
-
- count++;
- i--;
- j--;
- }
- else if(alignMoves[i][j] == 'u'){
- if(j != lengthB){
- // alignA = bases[seqA[i-1]] + alignA;
- // alignB = '-' + alignB;
- count++;
- }
-
- i--;
- }
- else if(alignMoves[i][j] == 'l'){
- if(i != lengthA){
- // alignA = '-' + alignA;
- // alignB = bases[seqB[j-1]] + alignB;
- count++;
- }
-
- j--;
- }
- }
-
- // cout << alignA << endl << alignB << endl;
-
- return alignMatrix[lengthA][lengthB] / (double)count;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "getDist");
- exit(1);
- }
-}
-/**************************************************************************************************/
-int correctDist::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, vector<int>& seqA, vector<int>& seqB){
- try {
-
- char nullReturn = -1;
-
- while(i>=1 && j>=1){
-
- if (m->control_pressed) { return nullReturn; }
-
- if(direction == 'd'){
- if(seqA[i-1] == seqB[j-1]) { return seqA[i-1]; }
- else { return nullReturn; }
- }
-
- else if(direction == 'l') { j--; }
- else { i--; }
-
- direction = alignMoves[i][j];
- }
-
- return nullReturn;
- }
- catch(exception& e) {
- m->errorOut(e, "correctDist", "getLastMatch");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
-
-