]> git.donarmstrong.com Git - mothur.git/blobdiff - myseqdist.cpp
moved mothur's source into a folder to make grabbing just the source easier on github
[mothur.git] / myseqdist.cpp
diff --git a/myseqdist.cpp b/myseqdist.cpp
deleted file mode 100644 (file)
index 78255d8..0000000
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- *  pds.seqdist.cpp
- *  
- *
- *  Created by Pat Schloss on 8/12/11.
- *  Copyright 2011 Patrick D. Schloss. All rights reserved.
- *
- */
-
-#include "myseqdist.h"
-#include "sequence.hpp"
-
-/**************************************************************************************************/
-correctDist::correctDist(int p) : processors(p) {
-       try {
-               m = MothurOut::getInstance();
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "correctDist");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-correctDist::correctDist(string sequenceFileName, int p) : processors(p) {
-       try {
-               m = MothurOut::getInstance();
-               getSequences(sequenceFileName);
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "correctDist");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int correctDist::addSeq(string seqName, string seqSeq){
-       try {
-               names.push_back(seqName);
-               sequences.push_back(fixSequence(seqSeq));
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "addSeq");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int correctDist::execute(string distanceFileName){
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-               processors = 1;
-#endif
-               correctMatrix.resize(4);
-               for(int i=0;i<4;i++){   correctMatrix[i].resize(4);     }
-               
-               correctMatrix[0][0] = 0.000000;         //AA
-               correctMatrix[1][0] = 11.619259;        //CA
-               correctMatrix[2][0] = 11.694004;        //TA
-               correctMatrix[3][0] = 7.748623;         //GA
-               
-               correctMatrix[1][1] = 0.000000;         //CC
-               correctMatrix[2][1] = 7.619657;         //TC
-               correctMatrix[3][1] = 12.852562;        //GC
-               
-               correctMatrix[2][2] = 0.000000;         //TT
-               correctMatrix[3][2] = 10.964048;        //TG
-               
-               correctMatrix[3][3] = 0.000000;         //GG
-               
-               for(int i=0;i<4;i++){
-                       for(int j=0;j<i;j++){
-                               correctMatrix[j][i] = correctMatrix[i][j];
-                       }
-               }
-               
-               numSeqs = names.size();
-                               
-               if(processors == 1){ driver(0, numSeqs, distanceFileName); }
-               else{
-                       
-                       for(int i=0;i<processors;i++){
-                               start.push_back(int (sqrt(float(i)/float(processors)) * numSeqs));
-                               end.push_back(int (sqrt(float(i+1)/float(processors)) * numSeqs));
-                       }
-                       
-                       createProcess(distanceFileName);
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "execute");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int correctDist::getSequences(string sequenceFileName){
-       try {
-               ifstream sequenceFile;
-               m->openInputFile(sequenceFileName, sequenceFile);
-               string seqName, seqSeq;
-               
-               while(!sequenceFile.eof()){
-                       if (m->control_pressed) { break; }
-                       
-                       Sequence temp(sequenceFile); m->gobble(sequenceFile);
-                       
-                       if (temp.getName() != "") {
-                               names.push_back(temp.getName());
-                               sequences.push_back(fixSequence(temp.getAligned()));
-                       }
-               }
-               sequenceFile.close();
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "getSequences");
-               exit(1);
-       }       
-}
-
-/**************************************************************************************************/
-vector<int> correctDist::fixSequence(string sequence){
-       try {
-               int alignLength = sequence.length();
-               
-               vector<int> seqVector;
-               
-               for(int i=0;i<alignLength;i++){
-                       if(sequence[i] == 'A')          {       seqVector.push_back(0);         }
-                       else if(sequence[i] == 'C')     {       seqVector.push_back(1);         }
-                       else if(sequence[i] == 'T')     {       seqVector.push_back(2);         }
-                       else if(sequence[i] == 'G')     {       seqVector.push_back(3);         }
-                       else if(sequence[i] == 'N')     {       seqVector.push_back(0);         }//hmmmm....
-               }
-               
-               return seqVector;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "fixSequence");
-               exit(1);
-       }       
-}
-
-/**************************************************************************************************/
-
-int correctDist::createProcess(string distanceFileName){
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-               int process = 1;
-               vector<int> processIDs;
-               
-               while(process != processors){
-                       
-                       int pid = fork();
-                       
-                       if(pid > 0){
-                               processIDs.push_back(pid);
-                               process++;
-                       }
-                       else if(pid == 0){
-                               driver(start[process], end[process], distanceFileName + toString(getpid()) + ".temp");
-                               exit(0);
-                       }
-                       else{
-                               cout << "Doh!" << endl;
-                               for (int i=0;i<processIDs.size();i++) {  int temp = processIDs[i]; kill(temp, SIGINT); }
-                               exit(0);
-                       }
-               }
-               
-               driver(start[0], end[0], distanceFileName);
-               
-               for (int i=0;i<processIDs.size();i++) { 
-                       int temp = processIDs[i];
-                       wait(&temp);
-               }
-               
-               for(int i=0;i<processIDs.size();i++){
-                       m->appendFiles((distanceFileName + toString(processIDs[i]) + ".temp"), distanceFileName);
-                       remove((distanceFileName + toString(processIDs[i]) + ".temp").c_str());
-               }
-#endif
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "createProcess");
-               exit(1);
-       }       
-}
-
-/**************************************************************************************************/
-
-int correctDist::driver(int start, int end, string distFileName){
-       try {
-               ofstream distFile;
-               m->openOutputFile(distFileName, distFile);
-               distFile << setprecision(9);
-               
-               if(start == 0){
-                       distFile << numSeqs << endl;
-               }
-               
-               int startTime = time(NULL);
-               
-               m->mothurOut("\nCalculating distances...\n");
-               
-               for(int i=start;i<end;i++){
-                       
-                       distFile << i;
-                       
-                       for(int j=0;j<i;j++){
-                               
-                               if (m->control_pressed) { distFile.close(); return 0; }
-                               
-                               double dist = getDist(sequences[i], sequences[j]);
-                               
-                               distFile << ' ' << dist;
-                       }
-                       distFile << endl;
-                       
-                       if(i % 100 == 0){ m->mothurOut(toString(i) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); }
-               }
-               distFile.close();
-               
-               if((end-1) % 100 != 0){ m->mothurOut(toString(end-1) + "\t" + toString(time(NULL) - startTime)); m->mothurOutEndLine(); }
-               m->mothurOut("Done.\n");
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "driver");
-               exit(1);
-       }       
-}
-/**************************************************************************************************/
-double correctDist::getDist(vector<int>& seqA, vector<int>& seqB){
-       try {
-               
-               int lengthA = seqA.size();
-               int lengthB = seqB.size();
-               
-               vector<vector<double> > alignMatrix(lengthA+1);
-               vector<vector<char> > alignMoves(lengthA+1);
-               
-               for(int i=0;i<=lengthA;i++){
-                       alignMatrix[i].resize(lengthB+1, 0);
-                       alignMoves[i].resize(lengthB+1, 'x');
-               }
-               
-               for(int i=0;i<=lengthA;i++){
-                       alignMatrix[i][0] = 15.0 * i;
-                       alignMoves[i][0] = 'u';
-               }
-               for(int i=0;i<=lengthB;i++){
-                       alignMatrix[0][i] = 15.0 * i;
-                       alignMoves[0][i] = 'l';
-               }
-               
-               for(int i=1;i<=lengthA;i++){
-                       for(int j=1;j<=lengthB;j++){
-                               
-                               if (m->control_pressed) {  return 0;  }
-                               
-                               double nogap;           
-                               nogap = alignMatrix[i-1][j-1] + correctMatrix[seqA[i-1]][seqB[j-1]];
-                               
-                               
-                               double gap;
-                               double left;
-                               if(i == lengthA){ //terminal gap
-                                       left = alignMatrix[i][j-1];
-                               }
-                               else{
-                                       if(seqB[j-1] == getLastMatch('l', alignMoves, i, j, seqA, seqB)){
-                                               gap = 4.0;
-                                       }
-                                       else{
-                                               gap = 15.0;
-                                       }
-                                       
-                                       left = alignMatrix[i][j-1] + gap;
-                               }
-                               
-                               
-                               double up;
-                               if(j == lengthB){ //terminal gap
-                                       up = alignMatrix[i-1][j];
-                               }
-                               else{
-                                       
-                                       if(seqA[i-1] == getLastMatch('u', alignMoves, i, j, seqA, seqB)){
-                                               gap = 4.0;
-                                       }
-                                       else{
-                                               gap = 15.0;
-                                       }
-                                       
-                                       up = alignMatrix[i-1][j] + gap;
-                               }
-                               
-                               
-                               
-                               if(nogap < left){
-                                       if(nogap < up){
-                                               alignMoves[i][j] = 'd';
-                                               alignMatrix[i][j] = nogap;
-                                       }
-                                       else{
-                                               alignMoves[i][j] = 'u';
-                                               alignMatrix[i][j] = up;
-                                       }
-                               }
-                               else{
-                                       if(left < up){
-                                               alignMoves[i][j] = 'l';
-                                               alignMatrix[i][j] = left;
-                                       }
-                                       else{
-                                               alignMoves[i][j] = 'u';
-                                               alignMatrix[i][j] = up;
-                                       }
-                               }
-                       }
-               }
-               
-               int i = lengthA;
-               int j = lengthB;
-               int count = 0;
-               
-               
-               //      string alignA = "";
-               //      string alignB = "";
-               //      string bases = "ACTG";
-               //      
-               //      for(int i=0;i<lengthA;i++){
-               //              cout << bases[seqA[i]];
-               //      }cout << endl;
-               //
-               //      for(int i=0;i<lengthB;i++){
-               //              cout << bases[seqB[i]];
-               //      }cout << endl;
-               
-               while(i > 0 && j > 0){
-                       
-                       if (m->control_pressed) {  return 0;  }
-                       
-                       if(alignMoves[i][j] == 'd'){
-                               //                      alignA = bases[seqA[i-1]] + alignA;
-                               //                      alignB = bases[seqB[j-1]] + alignB;
-                               
-                               count++;
-                               i--;
-                               j--;
-                       }
-                       else if(alignMoves[i][j] == 'u'){
-                               if(j != lengthB){
-                                       //                              alignA = bases[seqA[i-1]] + alignA;
-                                       //                              alignB = '-' + alignB;
-                                       count++;
-                               }
-                               
-                               i--;
-                       }
-                       else if(alignMoves[i][j] == 'l'){
-                               if(i != lengthA){
-                                       //                              alignA = '-' + alignA;
-                                       //                              alignB = bases[seqB[j-1]] + alignB;
-                                       count++;
-                               }
-                               
-                               j--;
-                       }
-               }
-               
-               //      cout << alignA << endl << alignB << endl;
-               
-               return alignMatrix[lengthA][lengthB] / (double)count;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "getDist");
-               exit(1);
-       }       
-}
-/**************************************************************************************************/
-int correctDist::getLastMatch(char direction, vector<vector<char> >& alignMoves, int i, int j, vector<int>& seqA, vector<int>& seqB){
-       try {
-               
-               char nullReturn = -1;
-               
-               while(i>=1 && j>=1){
-                       
-                       if (m->control_pressed) { return nullReturn; }
-                       
-                       if(direction == 'd'){
-                               if(seqA[i-1] == seqB[j-1])      {       return seqA[i-1];       }
-                               else                                            {       return nullReturn;      }
-                       }
-                       
-                       else if(direction == 'l')               {       j--;                            }
-                       else                                                    {       i--;                            }
-                       
-                       direction = alignMoves[i][j];
-               }
-               
-               return nullReturn;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "correctDist", "getLastMatch");
-               exit(1);
-       }       
-}
-/**************************************************************************************************/
-
-
-