]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bugs in venn and aligner
authorwestcott <westcott>
Mon, 20 Jul 2009 19:33:56 +0000 (19:33 +0000)
committerwestcott <westcott>
Mon, 20 Jul 2009 19:33:56 +0000 (19:33 +0000)
21 files changed:
aligncommand.cpp
alignment.cpp
alignment.hpp
bellerophon.h
blastalign.hpp
chao1.cpp
chimera.h
chimeraseqscommand.cpp
chimeraseqscommand.h
database.cpp
database.hpp
engine.cpp
gotohoverlap.cpp
gotohoverlap.hpp
kmerdb.cpp
nast.cpp
needlemanoverlap.cpp
pintail.cpp
pintail.h
sharedchao1.cpp
venn.cpp

index 2fda2529176bb263070c055965e74eaac732faf4..d672fc3df2a405a1057781dacd84c4a9f2b50d6f 100644 (file)
@@ -160,14 +160,16 @@ int AlignCommand::execute(){
                        templateDB = new KmerDB(templateFileName, kmerSize);
                }
                
-               if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 3000);        }
-               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000);                       }
+               int longestBase = templateDB->getLongestBase();
+               
+               if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
                else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
                else if(align == "noalign")             {       alignment = new NoAlign();                                                                                                      }
                else {
                        mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
                        mothurOutEndLine();
-                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 3000);
+                       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
                }
                
                string alignFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align";
@@ -184,7 +186,7 @@ int AlignCommand::execute(){
                        inFASTA.close();
                        
                        lines.push_back(new linePair(0, numFastaSeqs));
-                       
+               
                        driver(lines[0], alignFileName, reportFileName);
                        
                }
@@ -262,13 +264,14 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
                
                ifstream inFASTA;
                openInputFile(candidateFileName, inFASTA);
+
                inFASTA.seekg(line->start);
-               
+
                for(int i=0;i<line->numSeqs;i++){
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);
                        report.setCandidate(candidateSeq);
-                       
+       
                        Sequence temp = templateDB->findClosestSequence(candidateSeq);
                        Sequence* templateSeq = &temp;
                        
@@ -276,7 +279,9 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
                        report.setSearchParameters(search, templateDB->getSearchScore());
                        
                        Nast nast(alignment, candidateSeq, templateSeq);
+
                        report.setAlignmentParameters(align, alignment);
+
                        report.setNastParameters(nast);
                        
                        alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
index 6014506f5e5f3065b13328b4205c972d4b78334a..25b27604533cbefa4ba1748001a45622e9712547 100644 (file)
@@ -20,72 +20,81 @@ Alignment::Alignment() {    /*      do nothing      */      }
 /**************************************************************************************************/
 
 Alignment::Alignment(int A) : nCols(A), nRows(A) {
-       
-       alignment.resize(nRows);                        //      For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
-       for(int i=0;i<nRows;i++){                       //      matrix by initializing a matrix that is A x A.  By default we will set A
-               alignment[i].resize(nCols);             //      at 2000 for 16S rRNA gene sequences
-       }       
-       
+       try {
+               alignment.resize(nRows);                        //      For the Gotoh and Needleman-Wunsch we initialize the dynamic programming
+               for(int i=0;i<nRows;i++){                       //      matrix by initializing a matrix that is A x A.  By default we will set A
+                       alignment[i].resize(nCols);             //      at 2000 for 16S rRNA gene sequences
+               }       
+       }
+       catch(exception& e) {
+               errorOut(e, "Alignment", "Alignment");
+               exit(1);
+       }
 }
 
 /**************************************************************************************************/
 
 void Alignment::traceBack(){                   //      This traceback routine is used by the dynamic programming algorithms
-                                                                               //      to fill the values of seqAaln and seqBaln
-       seqAaln = "";
-       seqBaln = "";
-       int row = lB-1;
-       int column = lA-1;
-//     seqAstart = 1;
-//     seqAend = column;
-       
-       AlignmentCell currentCell = alignment[row][column];     //      Start the traceback from the bottom-right corner of the
-                                                                                                               //      matrix
-
-       if(currentCell.prevCell == 'x'){        seqAaln = seqBaln = "NOALIGNMENT";              }//If there's an 'x' in the bottom-
-       else{                                                                                           //      right corner bail out because it means nothing got aligned
-               while(currentCell.prevCell != 'x'){                             //      while the previous cell isn't an 'x', keep going...
-                       
-                       if(currentCell.prevCell == 'u'){                        //      if the pointer to the previous cell is 'u', go up in the
-                               seqAaln = '-' + seqAaln;                                //      matrix.  this indicates that we need to insert a gap in
-                               seqBaln = seqB[row] + seqBaln;                  //      seqA and a base in seqB
-                               currentCell = alignment[--row][column];
-                       }
-                       else if(currentCell.prevCell == 'l'){           //      if the pointer to the previous cell is 'l', go to the left
-                               seqBaln = '-' + seqBaln;                                //      in the matrix.  this indicates that we need to insert a gap
-                               seqAaln = seqA[column] + seqAaln;               //      in seqB and a base in seqA
-                               currentCell = alignment[row][--column];
-                       }
-                       else{
-                               seqAaln = seqA[column] + seqAaln;               //      otherwise we need to go diagonally up and to the left,
-                               seqBaln = seqB[row] + seqBaln;                  //      here we add a base to both alignments
-                               currentCell = alignment[--row][--column];
+       try {   
+                                                                       //      to fill the values of seqAaln and seqBaln
+               seqAaln = "";
+               seqBaln = "";
+               int row = lB-1;
+               int column = lA-1;
+               //      seqAstart = 1;
+               //      seqAend = column;
+               
+               AlignmentCell currentCell = alignment[row][column];     //      Start the traceback from the bottom-right corner of the
+               //      matrix
+               
+               if(currentCell.prevCell == 'x'){        seqAaln = seqBaln = "NOALIGNMENT";              }//If there's an 'x' in the bottom-
+               else{                                                                                           //      right corner bail out because it means nothing got aligned
+                       while(currentCell.prevCell != 'x'){                             //      while the previous cell isn't an 'x', keep going...
+                               
+                               if(currentCell.prevCell == 'u'){                        //      if the pointer to the previous cell is 'u', go up in the
+                                       seqAaln = '-' + seqAaln;                                //      matrix.  this indicates that we need to insert a gap in
+                                       seqBaln = seqB[row] + seqBaln;                  //      seqA and a base in seqB
+                                       currentCell = alignment[--row][column];
+                               }
+                               else if(currentCell.prevCell == 'l'){           //      if the pointer to the previous cell is 'l', go to the left
+                                       seqBaln = '-' + seqBaln;                                //      in the matrix.  this indicates that we need to insert a gap
+                                       seqAaln = seqA[column] + seqAaln;               //      in seqB and a base in seqA
+                                       currentCell = alignment[row][--column];
+                               }
+                               else{
+                                       seqAaln = seqA[column] + seqAaln;               //      otherwise we need to go diagonally up and to the left,
+                                       seqBaln = seqB[row] + seqBaln;                  //      here we add a base to both alignments
+                                       currentCell = alignment[--row][--column];
+                               }
                        }
                }
+               
+               pairwiseLength = seqAaln.length();
+               seqAstart = 1;  seqAend = 0;
+               seqBstart = 1;  seqBend = 0;
+               
+               for(int i=0;i<seqAaln.length();i++){
+                       if(seqAaln[i] != '-' && seqBaln[i] == '-')              {       seqAstart++;    }
+                       else if(seqAaln[i] == '-' && seqBaln[i] != '-') {       seqBstart++;    }
+                       else                                                                                    {       break;                  }
+               }
+               
+               pairwiseLength -= (seqAstart + seqBstart - 2);
+               
+               for(int i=seqAaln.length()-1; i>=0;i--){
+                       if(seqAaln[i] != '-' && seqBaln[i] == '-')              {       seqAend++;              }
+                       else if(seqAaln[i] == '-' && seqBaln[i] != '-') {       seqBend++;              }
+                       else                                                                                    {       break;                  }
+               }
+               pairwiseLength -= (seqAend + seqBend);
+               
+               seqAend = seqA.length() - seqAend - 1;
+               seqBend = seqB.length() - seqBend - 1;
        }
-       
-       pairwiseLength = seqAaln.length();
-       seqAstart = 1;  seqAend = 0;
-       seqBstart = 1;  seqBend = 0;
-       
-       for(int i=0;i<seqAaln.length();i++){
-               if(seqAaln[i] != '-' && seqBaln[i] == '-')              {       seqAstart++;    }
-               else if(seqAaln[i] == '-' && seqBaln[i] != '-') {       seqBstart++;    }
-               else                                                                                    {       break;                  }
-       }
-       
-       pairwiseLength -= (seqAstart + seqBstart - 2);
-       
-       for(int i=seqAaln.length()-1; i>=0;i--){
-               if(seqAaln[i] != '-' && seqBaln[i] == '-')              {       seqAend++;              }
-               else if(seqAaln[i] == '-' && seqBaln[i] != '-') {       seqBend++;              }
-               else                                                                                    {       break;                  }
+       catch(exception& e) {
+               errorOut(e, "Alignment", "traceBack");
+               exit(1);
        }
-       pairwiseLength -= (seqAend + seqBend);
-
-       seqAend = seqA.length() - seqAend - 1;
-       seqBend = seqB.length() - seqBend - 1;
-
 }
 /**************************************************************************************************/
 
index 215e4b32f77e1fc21df19ffd61b493b84d89acc6..77b650cab75b8f9bbbd4241f851ccf48da556c3f 100644 (file)
@@ -26,6 +26,7 @@ public:
        virtual ~Alignment();
        virtual void align(string, string) = 0;
        
+       
 //     float getAlignmentScore();
        string getSeqAAln();
        string getSeqBAln();
@@ -33,7 +34,7 @@ public:
        int getCandidateEndPos();
        int getTemplateStartPos();
        int getTemplateEndPos();
-
+       
        int getPairwiseLength();
 //     int getLongestTemplateGap();
 
index 3f28e5b9873bc2a8d16cc68d266b42963ccbc461..05e93d755e3948831ef1576cd6028771b9f6a537 100644 (file)
@@ -39,6 +39,8 @@ class Bellerophon : public Chimera {
                void getChimeras();
                void print(ostream&);
                
+               void setCons(string){};
+               
                
        private:
                Dist* distCalculator;
index 696c134375785698c1c15649fef4adc2df783a4a..94bce82936a5e05404ee66e4eda6d5cad282f3b3 100644 (file)
@@ -20,6 +20,8 @@ public:
        BlastAlignment(float, float, float, float);
        ~BlastAlignment();
        void align(string, string);
+       void setMatrix(int){};
+       
 private:
 
        string candidateFileName;
index b601b51f727490aeb3804f23b2aaa20ef1fad135..24521f570e4124a0efa56d1d55c75ed510770728 100644 (file)
--- a/chao1.cpp
+++ b/chao1.cpp
@@ -19,7 +19,7 @@ EstOutput Chao1::getValues(SAbundVector* rank){
                double singles = (double)rank->get(1);
                double doubles = (double)rank->get(2);
                double chaovar = 0.0000;
-       
+//cout << "singles = " << singles << " doubles = " << doubles << " sobs = " << sobs << endl;   
                double chao = sobs + singles*(singles-1)/(2*(doubles+1));
        
                if(singles > 0 && doubles > 0){
index 07ad92fb2fcc8d9f2d863f45cba2552d3e3833d4..e0e8944c6935afa6441b16c052b89b1fe2a51994 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -26,13 +26,16 @@ class Chimera {
        
                Chimera(){};
                Chimera(string);
+               Chimera(string, string);
                virtual ~Chimera(){};
                virtual void setFilter(bool f)                  {       filter = f;                     }
                virtual void setCorrection(bool c)              {       correction = c;         }
                virtual void setProcessors(int p)               {       processors = p;         }
                virtual void setWindow(int w)                   {       window = w;                     }
                virtual void setIncrement(int i)                {       increment = i;          }
-               virtual void setTemplate(string t)              {       templateFile = t;       }
+               
+               virtual void setCons(string) {};
+               
                
                //pure functions
                virtual void getChimeras() = 0; 
@@ -42,8 +45,7 @@ class Chimera {
                
                bool filter, correction;
                int processors, window, increment;
-               string templateFile;
-       
+                       
 
 };
 
index 105fd11b90afd8ce700bb3b20b66bcc9b39bdbb2..28e3e6a72d22e787b796fa04b8edda00eca560f8 100644 (file)
@@ -22,7 +22,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -44,6 +44,10 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        if (templatefile == "not open") { abort = true; }
                        else if (templatefile == "not found") { templatefile = "";  }   
                        
+                       consfile = validParameter.validFile(parameters, "conservation", true);
+                       if (consfile == "not open") { abort = true; }
+                       else if (consfile == "not found") { consfile = "";  }   
+                       
 
                        string temp;
                        temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "T"; }
@@ -58,12 +62,12 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
                        convert(temp, window);
                                        
-                       temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "10"; }
+                       temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "25"; }
                        convert(temp, increment);
                                
                        method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "pintail"; }
                        
-                       if ((method == "pintail") && (templatefile == "")) { mothurOut("You must provide a template file with the pintail method."); mothurOutEndLine(); abort = true;  }
+                       if ((method == "pintail") && (templatefile == "") && (consfile == "")) { mothurOut("You must provide a template or conservation file with the pintail method."); mothurOutEndLine(); abort = true;  }
                        
 
                }
@@ -105,9 +109,11 @@ int ChimeraSeqsCommand::execute(){
                
                if (abort == true) { return 0; }
                
-               if (method == "bellerophon")    {       chimera = new Bellerophon(fastafile);   }
-               else if (method == "pintail")   {       chimera = new Pintail(fastafile);               }
-               else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
+               if (method == "bellerophon")    {               chimera = new Bellerophon(fastafile);                   }
+               else if (method == "pintail")   {               chimera = new Pintail(fastafile, templatefile); 
+                       if (consfile != "")                     {               chimera->setCons(consfile);                                             }
+                       else                                            {               chimera->setCons("");                                                   }
+               }else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;         }
                
                //set user options
                chimera->setFilter(filter);
@@ -115,8 +121,7 @@ int ChimeraSeqsCommand::execute(){
                chimera->setProcessors(processors);
                chimera->setWindow(window);
                chimera->setIncrement(increment);
-               chimera->setTemplate(templatefile); 
-               
+                               
                //find chimeras
                chimera->getChimeras();
                
@@ -139,6 +144,5 @@ int ChimeraSeqsCommand::execute(){
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
index dc7df8fad969d33a2fa51f46718f5aa4812321f1..9bc8418bcab65820f56b13959c6cbba0110ea935 100644 (file)
@@ -28,7 +28,7 @@ public:
 private:
        
        bool abort;
-       string method, fastafile, templatefile;
+       string method, fastafile, templatefile, consfile;
        bool filter, correction;
        int processors, midpoint, averageLeft, averageRight, window, iters, increment;
        Chimera* chimera;
index 5fc9b676f128dd0a1d6999918b4fcb42357c407e..534747ac9208df062efc2bf2149430ff417d4a72 100644 (file)
@@ -15,7 +15,8 @@
 
 Database::Database(string fastaFileName){              //      This assumes that the template database is in fasta format, may 
                                                                                                //      need to alter this in the future?
-
+       longest = 0;
+       
        ifstream fastaFile;
        openInputFile(fastaFileName, fastaFile);
        
@@ -43,6 +44,13 @@ Database::Database(string fastaFileName){            //      This assumes that the template dat
                        }
                }
                templateSequences[i] = Sequence(seqName, aligned);
+               
+               //necessary because Sequence constructor by default sets whatever it reads to unaligned
+               //the setUnaligned function remove gap char.
+               templateSequences[i].setUnaligned(templateSequences[i].getUnaligned());
+               
+               if (templateSequences[i].getUnaligned().length() > longest)  { longest = templateSequences[i].getUnaligned().length(); }
+               
                fastaFile.putback(letter);
        }
        
@@ -64,4 +72,10 @@ Database::~Database(){
 
 float Database::getSearchScore()       {       return searchScore;             }       //      we're assuming that the search is already done
 
+
 /**************************************************************************************************/
+
+int Database::getLongestBase() {       return longest+1;               }       
+
+/**************************************************************************************************/
+
index 02cad03d0e34e33fac8324541266758d91f8693d..01393c130adb3250ffaf5c9ed97fd8a2d4043c1e 100644 (file)
@@ -23,8 +23,10 @@ public:
        virtual ~Database();
        virtual Sequence findClosestSequence(Sequence*) = 0;
        virtual float getSearchScore();
+       virtual int getLongestBase(); 
+       
 protected:
-       int numSeqs;
+       int numSeqs, longest;
        float searchScore;
        vector<Sequence> templateSequences;
 };
index 2215e081af76c8c2ccb3064aebe8d04dc6a94818..95c0ff03f0f05145a9435b8deb06e20bd72fdc8f 100644 (file)
@@ -244,13 +244,21 @@ bool ScriptEngine::getInput(){
 string ScriptEngine::getNextCommand(string& commandString) {
        try {
                string nextcommand = "";
+               int count = 0;
                
-               nextcommand = commandString.substr(0,commandString.find_first_of(';'));
-
+               //go through string until you reach ; or end
+               while (count < commandString.length()) { 
+                       
+                       if (commandString[count] == ';') {  break;   }
+                       else {          nextcommand += commandString[count];    }
+                       
+                       count++;
+               }
+               
+               //if you are not at the end
+               if (count != commandString.length())  {   commandString = commandString.substr(count+1, commandString.length());  }
+               else { commandString = ""; }
                                
-               if ((commandString.find_first_of(';')+1) <= commandString.length()) {
-                       commandString = commandString.substr(commandString.find_first_of(';')+1, commandString.length());
-               }else { commandString = ""; } //you have reached the last command.
                
                //get rid of spaces in between commands if any
                if (commandString.length() > 0) {
index e3be6c31c83ceff26c027546f5fc8f2ebf572ce5..f6fef1d6baeac3e950c5afbfa94ce4859496eb44 100644 (file)
 GotohOverlap::GotohOverlap(float gO, float gE, float m, float mm, int r) :
        gapOpen(gO), gapExtend(gE), match(m), mismatch(mm), Alignment(r) {
        
-       for(int i=1;i<nCols;i++){                               //      we initialize the dynamic programming matrix by setting the pointers in
-               alignment[0][i].prevCell = 'l';         //      the first row to the left
-               alignment[0][i].cValue = 0;
-               alignment[0][i].dValue = 0;
+       try {
+               for(int i=1;i<nCols;i++){                               //      we initialize the dynamic programming matrix by setting the pointers in
+                       alignment[0][i].prevCell = 'l';         //      the first row to the left
+                       alignment[0][i].cValue = 0;
+                       alignment[0][i].dValue = 0;
+               }
+               
+               for(int i=1;i<nRows;i++){                               //      we initialize the dynamic programming matrix by setting the pointers in
+                       alignment[i][0].prevCell = 'u';         //      the first column upward
+                       alignment[i][0].cValue = 0;
+                       alignment[i][0].iValue = 0;
+               }
+               
        }
-       
-       for(int i=1;i<nRows;i++){                               //      we initialize the dynamic programming matrix by setting the pointers in
-               alignment[i][0].prevCell = 'u';         //      the first column upward
-               alignment[i][0].cValue = 0;
-               alignment[i][0].iValue = 0;
+       catch(exception& e) {
+               errorOut(e, "GotohOverlap", "GotohOverlap");
+               exit(1);
        }
 }
 
 /**************************************************************************************************/
 
 void GotohOverlap::align(string A, string B){
-       
-       seqA = ' ' + A; lA = seqA.length();             //      the algorithm requires that the first character be a dummy value
-       seqB = ' ' + B; lB = seqB.length();             //      the algorithm requires that the first character be a dummy value
-       
-       for(int i=1;i<lB;i++){                                  //      the recursion here is shown in Webb and Miller, Fig. 1A.  Note that 
-               for(int j=1;j<lA;j++){                          //      if we need to conserve on space we should see Fig. 1B, which is linear
-                                                                                       //      in space, which I think is unnecessary
-                       float diagonal;
-                       if(seqB[i] == seqA[j])  {       diagonal = alignment[i-1][j-1].cValue + match;          }
-                       else                                    {       diagonal = alignment[i-1][j-1].cValue + mismatch;       }
-                       
-                       alignment[i][j].iValue = max(alignment[i][j-1].iValue, alignment[i][j-1].cValue + gapOpen) + gapExtend;
-                       alignment[i][j].dValue = max(alignment[i-1][j].dValue, alignment[i-1][j].cValue + gapOpen) + gapExtend;
-                       
-                       if(alignment[i][j].iValue > alignment[i][j].dValue){
-                               if(alignment[i][j].iValue > diagonal){
-                                       alignment[i][j].cValue = alignment[i][j].iValue;
-                                       alignment[i][j].prevCell = 'l';
+       try {
+               seqA = ' ' + A; lA = seqA.length();             //      the algorithm requires that the first character be a dummy value
+               seqB = ' ' + B; lB = seqB.length();             //      the algorithm requires that the first character be a dummy value
+               
+               for(int i=1;i<lB;i++){                                  //      the recursion here is shown in Webb and Miller, Fig. 1A.  Note that 
+                       for(int j=1;j<lA;j++){                          //      if we need to conserve on space we should see Fig. 1B, which is linear
+                               //      in space, which I think is unnecessary
+                               float diagonal;
+                               if(seqB[i] == seqA[j])  {       diagonal = alignment[i-1][j-1].cValue + match;          }
+                               else                                    {       diagonal = alignment[i-1][j-1].cValue + mismatch;       }
+                               
+                               alignment[i][j].iValue = max(alignment[i][j-1].iValue, alignment[i][j-1].cValue + gapOpen) + gapExtend;
+                               alignment[i][j].dValue = max(alignment[i-1][j].dValue, alignment[i-1][j].cValue + gapOpen) + gapExtend;
+                               
+                               if(alignment[i][j].iValue > alignment[i][j].dValue){
+                                       if(alignment[i][j].iValue > diagonal){
+                                               alignment[i][j].cValue = alignment[i][j].iValue;
+                                               alignment[i][j].prevCell = 'l';
+                                       }
+                                       else{
+                                               alignment[i][j].cValue = diagonal;
+                                               alignment[i][j].prevCell = 'd';
+                                       }
                                }
                                else{
-                                       alignment[i][j].cValue = diagonal;
-                                       alignment[i][j].prevCell = 'd';
+                                       if(alignment[i][j].dValue > diagonal){
+                                               alignment[i][j].cValue = alignment[i][j].dValue;
+                                               alignment[i][j].prevCell = 'u';
+                                       }
+                                       else{
+                                               alignment[i][j].cValue = diagonal;
+                                               alignment[i][j].prevCell = 'd';
+                                       }
                                }
+                               
                        }
-                       else{
-                               if(alignment[i][j].dValue > diagonal){
-                                       alignment[i][j].cValue = alignment[i][j].dValue;
-                                       alignment[i][j].prevCell = 'u';
-                               }
-                               else{
-                                       alignment[i][j].cValue = diagonal;
-                                       alignment[i][j].prevCell = 'd';
-                               }
-                       }
-                       
                }
+               Overlap over;
+               over.setOverlap(alignment, lA, lB, 0);  //      Fix the gaps at the ends of the sequences
+               traceBack();                                                    //      Construct the alignment and set seqAaln and seqBaln
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "GotohOverlap", "align");
+               exit(1);
        }
-       Overlap over;
-       over.setOverlap(alignment, lA, lB, 0);  //      Fix the gaps at the ends of the sequences
-       traceBack();                                                    //      Construct the alignment and set seqAaln and seqBaln
 }
 
 /**************************************************************************************************/
index 85176efc4c86031a60034910196f0a1a56fc5e45..0b7d8ea0f008f30ab4d4cb36d1c7ec22713c3dde 100644 (file)
@@ -30,6 +30,7 @@ class GotohOverlap : public Alignment {
 public:
        GotohOverlap(float, float, float, float, int);
        void align(string, string);
+       
        ~GotohOverlap() {}
        
 private:
index 0ec822f763feb3869a3afe63fecd78a71dec3e82..86dba09e64b42bb6480b4e5a2d5b99ab88df8303 100644 (file)
 /**************************************************************************************************/
 
 KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) {
-
-       string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
-       ifstream kmerFileTest(kmerDBName.c_str());
-       
-       int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
-       
-       maxKmer = power4s[kmerSize];
-       kmerLocations.resize(maxKmer+1);
+       try { 
        
-       if(!kmerFileTest){              //      if we can open the kmer db file, then read it in...
-               mothurOut("Generating the " + kmerDBName + " database...\t");   cout.flush();
-               generateKmerDB(kmerDBName);     
-       }
-       else{                                   //      ...otherwise generate it.
-               mothurOut("Reading in the " + kmerDBName + " database...\t");   cout.flush();
-               readKmerDB(kmerDBName, kmerFileTest);
+               string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+               ifstream kmerFileTest(kmerDBName.c_str());
+               
+               int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 };
+               
+               maxKmer = power4s[kmerSize];
+               kmerLocations.resize(maxKmer+1);
+               
+               if(!kmerFileTest){              //      if we can open the kmer db file, then read it in...
+                       mothurOut("Generating the " + kmerDBName + " database...\t");   cout.flush();
+                       generateKmerDB(kmerDBName);     
+               }
+               else{                                   //      ...otherwise generate it.
+                       mothurOut("Reading in the " + kmerDBName + " database...\t");   cout.flush();
+                       readKmerDB(kmerDBName, kmerFileTest);
+               }
+               mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
+               
        }
-       mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
+       catch(exception& e) {
+               errorOut(e, "KmerDB", "KmerDB");
+               exit(1);
+       }       
 
 }
 /**************************************************************************************************/
@@ -61,92 +68,114 @@ KmerDB::~KmerDB(){
 /**************************************************************************************************/
 
 Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){
+       try {
        
-       Kmer kmer(kmerSize);
-       
-       searchScore = 0;
-       int maxSequence = 0;
-
-       vector<int> matches(numSeqs, 0);                                                //      a record of the sequences with shared kmers
-       vector<int> timesKmerFound(kmerLocations.size()+1, 0);  //      a record of the kmers that we have already found
-
-       int numKmers = candidateSeq->getNumBases() - kmerSize + 1;      
-
-       for(int i=0;i<numKmers;i++){
-               int kmerNumber = kmer.getKmerNumber(candidateSeq->getUnaligned(), i);           //      go through the query sequence and get a kmer number
-               if(timesKmerFound[kmerNumber] == 0){                            //      if we haven't seen it before...
-                       for(int j=0;j<kmerLocations[kmerNumber].size();j++){//increase the count for each sequence that also has
-                               matches[kmerLocations[kmerNumber][j]]++;        //      that kmer
+               Kmer kmer(kmerSize);
+               
+               searchScore = 0;
+               int maxSequence = 0;
+               
+               vector<int> matches(numSeqs, 0);                                                //      a record of the sequences with shared kmers
+               vector<int> timesKmerFound(kmerLocations.size()+1, 0);  //      a record of the kmers that we have already found
+               
+               int numKmers = candidateSeq->getNumBases() - kmerSize + 1;      
+               
+               for(int i=0;i<numKmers;i++){
+                       int kmerNumber = kmer.getKmerNumber(candidateSeq->getUnaligned(), i);           //      go through the query sequence and get a kmer number
+                       if(timesKmerFound[kmerNumber] == 0){                            //      if we haven't seen it before...
+                               for(int j=0;j<kmerLocations[kmerNumber].size();j++){//increase the count for each sequence that also has
+                                       matches[kmerLocations[kmerNumber][j]]++;        //      that kmer
+                               }
                        }
+                       timesKmerFound[kmerNumber] = 1;                                         //      ok, we've seen the kmer now
                }
-               timesKmerFound[kmerNumber] = 1;                                         //      ok, we've seen the kmer now
-       }
-
-       for(int i=0;i<numSeqs;i++){                                                             //      find the db sequence that shares the most kmers with
-               if(matches[i] > searchScore){                                   //      the query sequence
-                       searchScore = matches[i];
-                       maxSequence = i;
+               
+               for(int i=0;i<numSeqs;i++){                                                             //      find the db sequence that shares the most kmers with
+                       if(matches[i] > searchScore){                                   //      the query sequence
+                               searchScore = matches[i];
+                               maxSequence = i;
+                       }
                }
+               
+               searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
+               
+               return templateSequences[maxSequence];                                  //      sequence with the most shared kmers.
+               
        }
-
-       searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
-       return templateSequences[maxSequence];                                  //      sequence with the most shared kmers.
+       catch(exception& e) {
+               errorOut(e, "KmerDB", "findClosestSequence");
+               exit(1);
+       }       
 }
 
 /**************************************************************************************************/
 
 void KmerDB::generateKmerDB(string kmerDBName){
+       try {
        
-       Kmer kmer(kmerSize);
-       
-       for(int i=0;i<numSeqs;i++){                                                             //      for all of the template sequences...
-
-               string seq = templateSequences[i].getUnaligned();       //      ...take the unaligned sequence...
-               int numKmers = seq.length() - kmerSize + 1;
-               
-               vector<int> seenBefore(maxKmer+1,0);
-               for(int j=0;j<numKmers;j++){                                            //      ...step though the sequence and get each kmer...
-                       int kmerNumber = kmer.getKmerNumber(seq, j);
-                       if(seenBefore[kmerNumber] == 0){
-                               kmerLocations[kmerNumber].push_back(i);         //      ...insert the sequence index into kmerLocations for
-                       }                                                                                               //      the appropriate kmer number
-                       seenBefore[kmerNumber] = 1;
-               }                                                                                                       
-       }
-       
-       ofstream kmerFile;                                                                              //      once we have the kmerLocations folder print it out
-       openOutputFile(kmerDBName, kmerFile);                                   //      to a file
-       
-       for(int i=0;i<maxKmer;i++){                                                             //      step through all of the possible kmer numbers
-               kmerFile << i << ' ' << kmerLocations[i].size();        //      print the kmer number and the number of sequences with
-               for(int j=0;j<kmerLocations[i].size();j++){                     //      that kmer.  then print out the indices of the sequences
-                       kmerFile << ' ' << kmerLocations[i][j];                 //      with that kmer.
+               Kmer kmer(kmerSize);
+               
+               for(int i=0;i<numSeqs;i++){                                                             //      for all of the template sequences...
+                       
+                       string seq = templateSequences[i].getUnaligned();       //      ...take the unaligned sequence...
+                       int numKmers = seq.length() - kmerSize + 1;
+                       
+                       vector<int> seenBefore(maxKmer+1,0);
+                       for(int j=0;j<numKmers;j++){                                            //      ...step though the sequence and get each kmer...
+                               int kmerNumber = kmer.getKmerNumber(seq, j);
+                               if(seenBefore[kmerNumber] == 0){
+                                       kmerLocations[kmerNumber].push_back(i);         //      ...insert the sequence index into kmerLocations for
+                               }                                                                                               //      the appropriate kmer number
+                               seenBefore[kmerNumber] = 1;
+                       }                                                                                                       
                }
-               kmerFile << endl;
+               
+               ofstream kmerFile;                                                                              //      once we have the kmerLocations folder print it out
+               openOutputFile(kmerDBName, kmerFile);                                   //      to a file
+               
+               for(int i=0;i<maxKmer;i++){                                                             //      step through all of the possible kmer numbers
+                       kmerFile << i << ' ' << kmerLocations[i].size();        //      print the kmer number and the number of sequences with
+                       for(int j=0;j<kmerLocations[i].size();j++){                     //      that kmer.  then print out the indices of the sequences
+                               kmerFile << ' ' << kmerLocations[i][j];                 //      with that kmer.
+                       }
+                       kmerFile << endl;
+               }
+               kmerFile.close();
+               
        }
-       kmerFile.close();
+       catch(exception& e) {
+               errorOut(e, "KmerDB", "generateKmerDB");
+               exit(1);
+       }       
        
 }
 
 /**************************************************************************************************/
 
 void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
-
-       kmerDBFile.seekg(0);                                                                    //      start at the beginning of the file
-       
-       string seqName;
-       int seqNumber;
+       try {
        
-       for(int i=0;i<maxKmer;i++){
-               int numValues;  
-               kmerDBFile >> seqName >> numValues;
+               kmerDBFile.seekg(0);                                                                    //      start at the beginning of the file
+               
+               string seqName;
+               int seqNumber;
                
-               for(int j=0;j<numValues;j++){                                           //      for each kmer number get the...
-                       kmerDBFile >> seqNumber;                                                //              1. number of sequences with the kmer number
-                       kmerLocations[i].push_back(seqNumber);                  //              2. sequence indices
+               for(int i=0;i<maxKmer;i++){
+                       int numValues;  
+                       kmerDBFile >> seqName >> numValues;
+                       
+                       for(int j=0;j<numValues;j++){                                           //      for each kmer number get the...
+                               kmerDBFile >> seqNumber;                                                //              1. number of sequences with the kmer number
+                               kmerLocations[i].push_back(seqNumber);                  //              2. sequence indices
+                       }
                }
+               kmerDBFile.close();
+               
        }
-       kmerDBFile.close();
+       catch(exception& e) {
+               errorOut(e, "KmerDB", "readKmerDB");
+               exit(1);
+       }       
 }
 
 /**************************************************************************************************/
index 0741ccd7ad444a250fa1dec5b23790771142a99c..4aed5903aec4985f92d5668d39301cabe3d779e6 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
 /**************************************************************************************************/
 
 Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method), candidateSeq(cand), templateSeq(temp) {
-       maxInsertLength = 0;
-       pairwiseAlignSeqs();    //      This is part A in Fig. 2 of DeSantis et al.
-       regapSequences();               //      This is parts B-F in Fig. 2 of DeSantis et al.
+       try {
+               maxInsertLength = 0;
+               pairwiseAlignSeqs();    //      This is part A in Fig. 2 of DeSantis et al.
+               regapSequences();               //      This is parts B-F in Fig. 2 of DeSantis et al.
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Nast", "Nast");
+               exit(1);
+       }
+
 }
 
 /**************************************************************************************************/
 
 void Nast::pairwiseAlignSeqs(){        //      Here we call one of the pairwise alignment methods to align our unaligned candidate
                                                                //      and template sequences
-       alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
+       try {
+               
+               alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
        
-       string candAln = alignment->getSeqAAln();
-       string tempAln = alignment->getSeqBAln();
+               string candAln = alignment->getSeqAAln();
+               string tempAln = alignment->getSeqBAln();
 
-       if(candAln == ""){
-               candidateSeq->setPairwise("");
-               templateSeq->setPairwise(templateSeq->getUnaligned());
-       }
-       else{
-               if(tempAln[0] == '-'){
-                       int pairwiseAlignmentLength = tempAln.length(); //      we need to make sure that the candidate sequence alignment
-                       for(int i=0;i<pairwiseAlignmentLength;i++){             //      starts where the template sequence alignment starts, if it
-                               if(isalpha(tempAln[i])){                                        //      starts early, we nuke the beginning of the candidate
-                                       candAln = candAln.substr(i);                    //      sequence
-                                       tempAln = tempAln.substr(i);
-                                       break;
+               if(candAln == ""){
+
+                       candidateSeq->setPairwise("");
+                       templateSeq->setPairwise(templateSeq->getUnaligned());
+
+               }
+               else{
+
+                       if(tempAln[0] == '-'){
+                               int pairwiseAlignmentLength = tempAln.length(); //      we need to make sure that the candidate sequence alignment
+                               for(int i=0;i<pairwiseAlignmentLength;i++){             //      starts where the template sequence alignment starts, if it
+                                       if(isalpha(tempAln[i])){                                        //      starts early, we nuke the beginning of the candidate
+                                               candAln = candAln.substr(i);                    //      sequence
+                                               tempAln = tempAln.substr(i);
+                                               break;
+                                       }
                                }
                        }
-               }
-               
-               int pairwiseAlignmentLength = tempAln.length();
-               if(tempAln[pairwiseAlignmentLength-1] == '-'){          //      we need to make sure that the candidate sequence alignment
-                       for(int i=pairwiseAlignmentLength-1; i>=0; i--){//      ends where the template sequence alignment ends, if it runs
-                               if(isalpha(tempAln[i])){                                        //      long, we nuke the end of the candidate sequence
-                                       candAln = candAln.substr(0,i+1);
-                                       tempAln = tempAln.substr(0,i+1);
-                                       break;
-                               }               
+                       
+                       int pairwiseAlignmentLength = tempAln.length();
+                       if(tempAln[pairwiseAlignmentLength-1] == '-'){          //      we need to make sure that the candidate sequence alignment
+                               for(int i=pairwiseAlignmentLength-1; i>=0; i--){//      ends where the template sequence alignment ends, if it runs
+                                       if(isalpha(tempAln[i])){                                        //      long, we nuke the end of the candidate sequence
+                                               candAln = candAln.substr(0,i+1);
+                                               tempAln = tempAln.substr(0,i+1);
+                                               break;
+                                       }               
+                               }
                        }
+
                }
+
+               candidateSeq->setPairwise(candAln);                                     //      set the pairwise sequences in the Sequence objects for
+               templateSeq->setPairwise(tempAln);                                      //      the candidate and template sequences
+
        }
-       candidateSeq->setPairwise(candAln);                                     //      set the pairwise sequences in the Sequence objects for
-       templateSeq->setPairwise(tempAln);                                      //      the candidate and template sequences
-       
+       catch(exception& e) {
+               errorOut(e, "Nast", "pairwiseAlignSeqs");
+               exit(1);
+       }       
 }
 
 /**************************************************************************************************/
@@ -73,274 +93,301 @@ void Nast::pairwiseAlignSeqs(){   //      Here we call one of the pairwise alignment me
 void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAlign){
 
 //     here we do steps C-F of Fig. 2 from DeSantis et al.
+       try {
        
-       int longAlignmentLength = newTemplateAlign.length();    
-
-       for(int i=0; i<longAlignmentLength; i++){                               //      use the long alignment as the standard
-               int rightIndex, rightRoom, leftIndex, leftRoom;
-
-               //      Part C of Fig. 2 from DeSantis et al.
-               if((isalpha(newTemplateAlign[i]) != isalpha(tempAln[i]))){      //if there is a discrepancy between the regapped
-                       
-
-                       
-                       //      Part D of Fig. 2 from DeSantis et al.           //      template sequence and the official template sequence
-                       for(leftIndex=i-1;leftIndex>=0;leftIndex--){    //      then we've got problems...
-                               if(!isalpha(candAln[leftIndex])){
-                                       leftRoom = 1;   //count how far it is to the nearest gap on the LEFT side of the anomaly
-                                       while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom]))   {       leftRoom++;             }
-                                       break;
-                               }
-                       }
-
-                       
-                       for(rightIndex=i+1;rightIndex<longAlignmentLength;rightIndex++){
-                               if(!isalpha(candAln[rightIndex])){
-                                       rightRoom = 1;  //count how far it is to the nearest gap on the RIGHT side of the anomaly
-                                       while(rightIndex+rightRoom<longAlignmentLength && !isalpha(candAln[rightIndex+rightRoom]))      {       rightRoom++;    }
-                                       break;
-                               }
-                       }
-                       
-                       int insertLength = 0;                                                   //      figure out how long the anomaly is
-                       while(!isalpha(newTemplateAlign[i + insertLength]))     {       insertLength++; }
-                       if(insertLength > maxInsertLength){     maxInsertLength = insertLength; }
+               int longAlignmentLength = newTemplateAlign.length();    
+               
+               for(int i=0; i<longAlignmentLength; i++){                               //      use the long alignment as the standard
+                       int rightIndex, rightRoom, leftIndex, leftRoom;
                        
-                       if((leftRoom + rightRoom) >= insertLength){
-                               //      Parts D & E from Fig. 2 of DeSantis et al.
-                               if((i-leftIndex) <= (rightIndex-i)){            //      the left gap is closer - > move stuff left there's
-                                       if(leftRoom >= insertLength){                   //      enough room to the left to move
-                                               string leftTemplateString = newTemplateAlign.substr(0,i);
-                                               string rightTemplateString = newTemplateAlign.substr(i+insertLength);
-                                               newTemplateAlign = leftTemplateString + rightTemplateString;
-                                               longAlignmentLength = newTemplateAlign.length();
-                                               
-                                               string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1);
-                                               string rightCandidateString = candAln.substr(leftIndex+1);
-                                               candAln = leftCandidateString + rightCandidateString;   
+                       //      Part C of Fig. 2 from DeSantis et al.
+                       if((isalpha(newTemplateAlign[i]) != isalpha(tempAln[i]))){      //if there is a discrepancy between the regapped
+                               
+                               rightRoom = 0; leftRoom = 0;
+                               
+                               //      Part D of Fig. 2 from DeSantis et al.           //      template sequence and the official template sequence
+                               for(leftIndex=i-1;leftIndex>0;leftIndex--){     //      then we've got problems...
+                                       if(!isalpha(candAln[leftIndex])){
+                                               leftRoom = 1;   //count how far it is to the nearest gap on the LEFT side of the anomaly
+                                               while(leftIndex-leftRoom>=0 && !isalpha(candAln[leftIndex-leftRoom]))   {       leftRoom++;             }
+                                               break;
                                        }
-                                       else{                                                                   //      not enough room to the left, have to steal some space to
-                                               string leftTemplateString = newTemplateAlign.substr(0,i);       //      the right
-                                               string rightTemplateString = newTemplateAlign.substr(i+insertLength);
-                                               newTemplateAlign = leftTemplateString + rightTemplateString;
-                                               longAlignmentLength = newTemplateAlign.length();
-                                               
-                                               string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
-                                               string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
-                                               string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom));
-                                               candAln = leftCandidateString + insertString + rightCandidateString;    
+                               }
+                               
+                               
+                               for(rightIndex=i+1;rightIndex<longAlignmentLength-1;rightIndex++){
+                                       if(!isalpha(candAln[rightIndex])){
+                                               rightRoom = 1;  //count how far it is to the nearest gap on the RIGHT side of the anomaly
+                                               while(rightIndex+rightRoom<longAlignmentLength && !isalpha(candAln[rightIndex+rightRoom]))      {       rightRoom++;    }
+                                               break;
                                        }
                                }
-                               else{                                                                           //      the right gap is closer - > move stuff right there's
-                                       if(rightRoom >= insertLength){                  //      enough room to the right to move
-                                               string leftTemplateString = newTemplateAlign.substr(0,i);
-                                               string rightTemplateString = newTemplateAlign.substr(i+insertLength);
-                                               newTemplateAlign = leftTemplateString + rightTemplateString;
-                                               longAlignmentLength = newTemplateAlign.length();
-                                               
-                                               string leftCandidateString = candAln.substr(0,rightIndex);
-                                               string rightCandidateString = candAln.substr(rightIndex+insertLength);
-                                               candAln = leftCandidateString + rightCandidateString;   
-                                               
+                                       
+                               int insertLength = 0;                                                   //      figure out how long the anomaly is
+                               while(!isalpha(newTemplateAlign[i + insertLength]))     {       insertLength++; }
+                               if(insertLength > maxInsertLength){     maxInsertLength = insertLength; }
+                                       
+                               if((leftRoom + rightRoom) >= insertLength){
+       
+                                       //      Parts D & E from Fig. 2 of DeSantis et al.
+                                       if((i-leftIndex) <= (rightIndex-i)){            //      the left gap is closer - > move stuff left there's
+       
+                                               if(leftRoom >= insertLength){                   //      enough room to the left to move
+       
+                                                       string leftTemplateString = newTemplateAlign.substr(0,i);
+                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       newTemplateAlign = leftTemplateString + rightTemplateString;
+                                                       longAlignmentLength = newTemplateAlign.length();
+                                                       
+                                                       string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1);
+                                                       string rightCandidateString = candAln.substr(leftIndex+1);
+                                                       candAln = leftCandidateString + rightCandidateString;
+               
+                                               }
+                                               else{                                                                   //      not enough room to the left, have to steal some space to
+                                                       string leftTemplateString = newTemplateAlign.substr(0,i);       //      the right
+                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       newTemplateAlign = leftTemplateString + rightTemplateString;
+                                                       longAlignmentLength = newTemplateAlign.length();
+                                                       
+                                                       string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
+                                                       string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+                                                       string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom));
+                                                       candAln = leftCandidateString + insertString + rightCandidateString;
+                               
+                                               }
                                        }
-                                       else{                                                                   //      not enough room to the right, have to steal some 
-                                                                                                                       //      space to the left lets move left and then right...
-                                               string leftTemplateString = newTemplateAlign.substr(0,i);
-                                               string rightTemplateString = newTemplateAlign.substr(i+insertLength);
-                                               newTemplateAlign = leftTemplateString + rightTemplateString;
-                                               longAlignmentLength = newTemplateAlign.length();
-                                               
-                                               string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1);
-                                               string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
-                                               string rightCandidateString = candAln.substr(rightIndex+rightRoom);
-                                               candAln = leftCandidateString + insertString + rightCandidateString;    
-                                               
+                                       else{                                                                           //      the right gap is closer - > move stuff right there's
+                                               if(rightRoom >= insertLength){                  //      enough room to the right to move
+                       
+                                                       string leftTemplateString = newTemplateAlign.substr(0,i);
+                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       newTemplateAlign = leftTemplateString + rightTemplateString;
+                                                       longAlignmentLength = newTemplateAlign.length();
+                                                       
+                                                       string leftCandidateString = candAln.substr(0,rightIndex);
+                                                       string rightCandidateString = candAln.substr(rightIndex+insertLength);
+                                                       candAln = leftCandidateString + rightCandidateString;   
+                                                                       
+                                               }
+                                               else{                                                                   //      not enough room to the right, have to steal some 
+                                                       //      space to the left lets move left and then right...
+                                                       string leftTemplateString = newTemplateAlign.substr(0,i);
+                                                       string rightTemplateString = newTemplateAlign.substr(i+insertLength);
+                                                       newTemplateAlign = leftTemplateString + rightTemplateString;
+                                                       longAlignmentLength = newTemplateAlign.length();
+                                                                       
+                                                       string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1);
+                                                       string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+                                                       string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+                                                       candAln = leftCandidateString + insertString + rightCandidateString;    
+                                                                       
+                                               }
                                        }
                                }
-                       }
-                       else{                                                                                   //      there could be a case where there isn't enough room in
-                               string leftTemplateString = newTemplateAlign.substr(0,i);                       //      either direction to move stuff
-                               string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
-                               newTemplateAlign = leftTemplateString + rightTemplateString;
-                               longAlignmentLength = newTemplateAlign.length();
+                               else{
+                                                                                                       //      there could be a case where there isn't enough room in
+                                       string leftTemplateString = newTemplateAlign.substr(0,i);                       //      either direction to move stuff
+                                       string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom);
+                                       newTemplateAlign = leftTemplateString + rightTemplateString;
+                                       longAlignmentLength = newTemplateAlign.length();
+                                                       
+                                       string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
+                                       string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
+                                       string rightCandidateString = candAln.substr(rightIndex+rightRoom);
+                                       candAln = leftCandidateString + insertString + rightCandidateString;
+       
+                               }
                                
-                               string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1);
-                               string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1);
-                               string rightCandidateString = candAln.substr(rightIndex+rightRoom);
-                               candAln = leftCandidateString + insertString + rightCandidateString;    
-                       }
-                       
-                       i -= insertLength;
-               } 
+                               i -= insertLength;
+                       } 
+               }
        }
-
+       catch(exception& e) {
+               errorOut(e, "Nast", "removeExtraGaps");
+               exit(1);
+       }       
+       
 }
 
 /**************************************************************************************************/
 
 void Nast::regapSequences(){   //This is essentially part B in Fig 2. of DeSantis et al.
+       try { 
        
-       string candPair = candidateSeq->getPairwise();
-       string candAln = "";
-       
-       string tempPair = templateSeq->getPairwise();
-       string tempAln = templateSeq->getAligned();             //      we use the template aligned sequence as our guide
-       
-       int pairwiseLength = candPair.length();
-       int fullAlignLength = tempAln.length();
-       
-       if(candPair == ""){
-               for(int i=0;i<fullAlignLength;i++)      {       candAln += '.';         }
-               candidateSeq->setAligned(candAln);
-               return;
-       }
-       
-       int fullAlignIndex = 0;
-       int pairwiseAlignIndex = 0;
-       string newTemplateAlign = "";                                   //      this is going to be messy so we want a temporary template
-                                                                                                       //      alignment string
-       while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex]  == '-'){
-               candAln += '.';                                                         //      add the initial '-' and '.' to the candidate and template
-               newTemplateAlign += tempAln[fullAlignIndex];//  pairwise sequences
-               fullAlignIndex++;
-       }
-       
-       string lastLoop = "";
-       
-       while(pairwiseAlignIndex<pairwiseLength){
-               if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
-                  && isalpha(candPair[pairwiseAlignIndex])){
-                       //  the template and candidate pairwise and template aligned have characters
-                       //      need to add character onto the candidatSeq.aligned sequence
-                       
-                       candAln += candPair[pairwiseAlignIndex];
-                       newTemplateAlign += tempPair[pairwiseAlignIndex];//
-                       
-                       pairwiseAlignIndex++;
-                       fullAlignIndex++;
+               string candPair = candidateSeq->getPairwise();
+               string candAln = "";
+               
+               string tempPair = templateSeq->getPairwise();
+               string tempAln = templateSeq->getAligned();             //      we use the template aligned sequence as our guide
+               
+               int pairwiseLength = candPair.length();
+               int fullAlignLength = tempAln.length();
+               
+               if(candPair == ""){
+                       for(int i=0;i<fullAlignLength;i++)      {       candAln += '.';         }
+                       candidateSeq->setAligned(candAln);
+                       return;
                }
-               else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
-                               && isalpha(candPair[pairwiseAlignIndex])){
-                       //      the template pairwise and candidate pairwise are characters and the template aligned is a gap
-                       //      need to insert gaps into the candidateSeq.aligned sequence
-                       
-                       candAln += '-';
-                       newTemplateAlign += '-';//
+               
+               int fullAlignIndex = 0;
+               int pairwiseAlignIndex = 0;
+               string newTemplateAlign = "";                                   //      this is going to be messy so we want a temporary template
+               //      alignment string
+               while(tempAln[fullAlignIndex] == '.' || tempAln[fullAlignIndex]  == '-'){
+                       candAln += '.';                                                         //      add the initial '-' and '.' to the candidate and template
+                       newTemplateAlign += tempAln[fullAlignIndex];//  pairwise sequences
                        fullAlignIndex++;
                }
-               else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
-                               && isalpha(candPair[pairwiseAlignIndex])){
-                       //  the template pairwise is a gap and the template aligned and pairwise sequences have characters
-                       //      this is the alpha scenario.  add character to the candidateSeq.aligned sequence without progressing
-                       //      further through the tempAln sequence.
-                       
-                       candAln += candPair[pairwiseAlignIndex];
-                       newTemplateAlign += '-';//
-                       pairwiseAlignIndex++;
-               }
-               else if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
-                               && !isalpha(candPair[pairwiseAlignIndex])){
-                       //  the template pairwise and full alignment are characters and the candidate sequence has a gap
-                       //      should not be a big deal, just add the gap position to the candidateSeq.aligned sequence;
-                       
-                       candAln += candPair[pairwiseAlignIndex];
-                       newTemplateAlign += tempAln[fullAlignIndex];//
-                       fullAlignIndex++;                       
-                       pairwiseAlignIndex++;
+               
+               string lastLoop = "";
+               
+               while(pairwiseAlignIndex<pairwiseLength){
+                       if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+                          && isalpha(candPair[pairwiseAlignIndex])){
+                               //  the template and candidate pairwise and template aligned have characters
+                               //      need to add character onto the candidatSeq.aligned sequence
+                               
+                               candAln += candPair[pairwiseAlignIndex];
+                               newTemplateAlign += tempPair[pairwiseAlignIndex];//
+                               
+                               pairwiseAlignIndex++;
+                               fullAlignIndex++;
+                       }
+                       else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+                                       && isalpha(candPair[pairwiseAlignIndex])){
+                               //      the template pairwise and candidate pairwise are characters and the template aligned is a gap
+                               //      need to insert gaps into the candidateSeq.aligned sequence
+                               
+                               candAln += '-';
+                               newTemplateAlign += '-';//
+                               fullAlignIndex++;
+                       }
+                       else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+                                       && isalpha(candPair[pairwiseAlignIndex])){
+                               //  the template pairwise is a gap and the template aligned and pairwise sequences have characters
+                               //      this is the alpha scenario.  add character to the candidateSeq.aligned sequence without progressing
+                               //      further through the tempAln sequence.
+                               
+                               candAln += candPair[pairwiseAlignIndex];
+                               newTemplateAlign += '-';//
+                               pairwiseAlignIndex++;
+                       }
+                       else if(isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+                                       && !isalpha(candPair[pairwiseAlignIndex])){
+                               //  the template pairwise and full alignment are characters and the candidate sequence has a gap
+                               //      should not be a big deal, just add the gap position to the candidateSeq.aligned sequence;
+                               
+                               candAln += candPair[pairwiseAlignIndex];
+                               newTemplateAlign += tempAln[fullAlignIndex];//
+                               fullAlignIndex++;                       
+                               pairwiseAlignIndex++;
+                       }
+                       else if(!isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+                                       && isalpha(candPair[pairwiseAlignIndex])){
+                               //      the template pairwise and aligned are gaps while the candidate pairwise has a character
+                               //      this would be an insertion, go ahead and add the character->seems to be the opposite of the alpha scenario
+                               
+                               candAln += candPair[pairwiseAlignIndex];
+                               newTemplateAlign += tempAln[fullAlignIndex];//
+                               pairwiseAlignIndex++;
+                               fullAlignIndex++;                       
+                       }
+                       else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
+                                       && !isalpha(candPair[pairwiseAlignIndex])){
+                               //      template pairwise has a character, but its full aligned sequence and candidate sequence have gaps
+                               //      this would happen like we need to add a gap.  basically the opposite of the alpha situation
+                               
+                               newTemplateAlign += tempAln[fullAlignIndex];//
+                               candAln += "-";
+                               fullAlignIndex++;                       
+                       }
+                       else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
+                                       && !isalpha(candPair[pairwiseAlignIndex])){
+                               //      template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible
+                               //      would skip the gaps and not progress through full alignment sequence
+                               //      not tested yet
+                               
+                               mothurOut("We're into D " + toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); mothurOutEndLine();
+                               pairwiseAlignIndex++;
+                       }
+                       else{
+                               //      everything has a gap - not possible
+                               //      not tested yet
+                               
+                               mothurOut("We're into F " +  toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); mothurOutEndLine();
+                               pairwiseAlignIndex++;
+                               fullAlignIndex++;                       
+                       }               
                }
-               else if(!isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
-                               && isalpha(candPair[pairwiseAlignIndex])){
-                       //      the template pairwise and aligned are gaps while the candidate pairwise has a character
-                       //      this would be an insertion, go ahead and add the character->seems to be the opposite of the alpha scenario
-                       
-                       candAln += candPair[pairwiseAlignIndex];
-                       newTemplateAlign += tempAln[fullAlignIndex];//
-                       pairwiseAlignIndex++;
-                       fullAlignIndex++;                       
+               
+               for(int i=fullAlignIndex;i<fullAlignLength;i++){
+                       candAln += '.';
+                       newTemplateAlign += tempAln[i];//
                }
-               else if(isalpha(tempPair[pairwiseAlignIndex]) && !isalpha(tempAln[fullAlignIndex])
-                               && !isalpha(candPair[pairwiseAlignIndex])){
-                       //      template pairwise has a character, but its full aligned sequence and candidate sequence have gaps
-                       //      this would happen like we need to add a gap.  basically the opposite of the alpha situation
-                       
-                       newTemplateAlign += tempAln[fullAlignIndex];//
-                       candAln += "-";
-                       fullAlignIndex++;                       
+               
+               int start, end;
+               for(int i=0;i<candAln.length();i++){
+                       if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }       //      if we padded the alignemnt from
+                       else{                   start = i;                      break;          }                                                       //      blast with Z's, change them to
+               }                                                                                                                                                               //      '.' characters
+               
+               for(int i=candAln.length()-1;i>=0;i--){                                                                                 //      ditto.
+                       if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }
+                       else{                   end = i;                        break;          }
                }
-               else if(!isalpha(tempPair[pairwiseAlignIndex]) && isalpha(tempAln[fullAlignIndex])
-                               && !isalpha(candPair[pairwiseAlignIndex])){
-                       //      template and candidate pairwise are gaps and the template aligned is not a gap this should not be possible
-                       //      would skip the gaps and not progress through full alignment sequence
-                       //      not tested yet
-                       
-                       mothurOut("We're into D " + toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); mothurOutEndLine();
-                       pairwiseAlignIndex++;
+               
+               for(int i=start;i<=end;i++){                                    //      go through the candidate alignment sequence and make sure that
+                       candAln[i] = toupper(candAln[i]);                       //      everything is upper case
                }
-               else{
-                       //      everything has a gap - not possible
-                       //      not tested yet
-                       
-                       mothurOut("We're into F " +  toString(fullAlignIndex) + " " +  toString(pairwiseAlignIndex)); mothurOutEndLine();
-                       pairwiseAlignIndex++;
-                       fullAlignIndex++;                       
-               }               
-       }
-       
-       for(int i=fullAlignIndex;i<fullAlignLength;i++){
-               candAln += '.';
-               newTemplateAlign += tempAln[i];//
-       }
-       
-       int start, end;
-       for(int i=0;i<candAln.length();i++){
-               if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }       //      if we padded the alignemnt from
-               else{                   start = i;                      break;          }                                                       //      blast with Z's, change them to
-       }                                                                                                                                                               //      '.' characters
-       
-       for(int i=candAln.length()-1;i>=0;i--){                                                                                 //      ditto.
-               if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }
-               else{                   end = i;                        break;          }
-       }
-       
-       for(int i=start;i<=end;i++){                                    //      go through the candidate alignment sequence and make sure that
-               candAln[i] = toupper(candAln[i]);                       //      everything is upper case
+               
+               
+               if(candAln.length() != tempAln.length()){               //      if the regapped candidate sequence is longer than the official
+                       removeExtraGaps(candAln, tempAln, newTemplateAlign);//  template alignment then we need to do steps C-F in Fig.
+               }                                                                                               //      2 of Desantis et al.
+               
+               
+               candidateSeq->setAligned(candAln);
        }
-
-
-       if(candAln.length() != tempAln.length()){               //      if the regapped candidate sequence is longer than the official
-               removeExtraGaps(candAln, tempAln, newTemplateAlign);//  template alignment then we need to do steps C-F in Fig.
-       }                                                                                               //      2 of Desantis et al.
-       
-
-       candidateSeq->setAligned(candAln);
+       catch(exception& e) {
+               errorOut(e, "Nast", "regapSequences");
+               exit(1);
+       }       
 }
 
 /**************************************************************************************************/
 
 float Nast::getSimilarityScore(){
-
-       string cand = candidateSeq->getAligned();
-       string temp = templateSeq->getAligned();
-       int alignmentLength = temp.length();
-       int mismatch = 0;
-       int denominator = 0;
+       try {
        
-       for(int i=0;i<alignmentLength;i++){
-               if(cand[i] == '-' && temp[i] == '-'){
-                       
-               }
-               else if(cand[i] != '.' && temp[i] != '.'){
-                       denominator++;
-                       
-                       if(cand[i] != temp[i]){
-                               mismatch++;
+               string cand = candidateSeq->getAligned();
+               string temp = templateSeq->getAligned();
+               int alignmentLength = temp.length();
+               int mismatch = 0;
+               int denominator = 0;
+               
+               for(int i=0;i<alignmentLength;i++){
+                       if(cand[i] == '-' && temp[i] == '-'){
+                               
+                       }
+                       else if(cand[i] != '.' && temp[i] != '.'){
+                               denominator++;
+                               
+                               if(cand[i] != temp[i]){
+                                       mismatch++;
+                               }
                        }
                }
+               float similarity = 100 * (1. - mismatch / (float)denominator);
+               if(denominator == 0){   similarity = 0.0000;    }
+               
+               return similarity;
+               
        }
-       float similarity = 100 * (1. - mismatch / (float)denominator);
-       if(denominator == 0){   similarity = 0.0000;    }
-
-       return similarity;
+       catch(exception& e) {
+               errorOut(e, "Nast", "getSimilarityScore");
+               exit(1);
+       }       
 }
 
 /**************************************************************************************************/
index effe0d310d99958cd5e97a09b494353d902e548c..b14d8e3d10e42ee9c7a21f5595315b29ad172ff5 100644 (file)
 
 NeedlemanOverlap::NeedlemanOverlap(float gO, float m, float mm, int r) ://     note that we don't have a gap extend
 gap(gO), match(m), mismatch(mm), Alignment(r) {                                                        //      the gap openning penalty is assessed for
-                                                                                                                                               //      every gapped position
-       for(int i=1;i<nCols;i++){
-               alignment[0][i].prevCell = 'l';                                 //      initialize first row by pointing all poiters to the left
-               alignment[0][i].cValue = 0;                                             //      and the score to zero
-       }
+       try {                                                                                                                                   //      every gapped position
+               for(int i=1;i<nCols;i++){
+                       alignment[0][i].prevCell = 'l';                                 //      initialize first row by pointing all poiters to the left
+                       alignment[0][i].cValue = 0;                                             //      and the score to zero
+               }
+               
+               for(int i=1;i<nRows;i++){
+                       alignment[i][0].prevCell = 'u';                                 //      initialize first column by pointing all poiters upwards
+                       alignment[i][0].cValue = 0;                                             //      and the score to zero
+               }
        
-       for(int i=1;i<nRows;i++){
-               alignment[i][0].prevCell = 'u';                                 //      initialize first column by pointing all poiters upwards
-               alignment[i][0].cValue = 0;                                             //      and the score to zero
        }
+       catch(exception& e) {
+               errorOut(e, "NeedlemanOverlap", "NeedlemanOverlap");
+               exit(1);
+       }
+                                                                                                                               
+                                                                                                                                               
 }
-
 /**************************************************************************************************/
 
 NeedlemanOverlap::~NeedlemanOverlap(){ /*      do nothing      */      }
@@ -45,45 +52,54 @@ NeedlemanOverlap::~NeedlemanOverlap(){      /*      do nothing      */      }
 /**************************************************************************************************/
 
 void NeedlemanOverlap::align(string A, string B){
-       
-       seqA = ' ' + A; lA = seqA.length();             //      algorithm requires a dummy space at the beginning of each string
-       seqB = ' ' + B; lB = seqB.length();             //      algorithm requires a dummy space at the beginning of each string
-       
-       for(int i=1;i<lB;i++){                                  //      This code was largely translated from Perl code provided in Ex 3.1 
-               for(int j=1;j<lA;j++){                          //      of the O'Reilly BLAST book.  I found that the example output had a
-                                                                                       //      number of errors
-                       float diagonal;
-                       if(seqB[i] == seqA[j])  {       diagonal = alignment[i-1][j-1].cValue + match;          }
-                       else                                    {       diagonal = alignment[i-1][j-1].cValue + mismatch;       }
-                       
-                       float up        = alignment[i-1][j].cValue + gap;
-                       float left      = alignment[i][j-1].cValue + gap;
-                       
-                       if(diagonal >= up){
-                               if(diagonal >= left){
-                                       alignment[i][j].cValue = diagonal;
-                                       alignment[i][j].prevCell = 'd';
-                               }
-                               else{
-                                       alignment[i][j].cValue = left;
-                                       alignment[i][j].prevCell = 'l';
-                               }
-                       }
-                       else{
-                               if(up >= left){
-                                       alignment[i][j].cValue = up;
-                                       alignment[i][j].prevCell = 'u';
+       try {
+               seqA = ' ' + A; lA = seqA.length();             //      algorithm requires a dummy space at the beginning of each string
+               seqB = ' ' + B; lB = seqB.length();             //      algorithm requires a dummy space at the beginning of each string
+
+               if (lA > nRows) { mothurOut("Your one of your candidate sequences is longer than you longest template sequence."); mothurOutEndLine();  }
+               
+               for(int i=1;i<lB;i++){                                  //      This code was largely translated from Perl code provided in Ex 3.1 
+                       for(int j=1;j<lA;j++){                          //      of the O'Reilly BLAST book.  I found that the example output had a
+                               //      number of errors
+                               float diagonal;
+                               if(seqB[i] == seqA[j])  {       diagonal = alignment[i-1][j-1].cValue + match;          }
+                               else                                    {       diagonal = alignment[i-1][j-1].cValue + mismatch;       }
+                               
+                               float up        = alignment[i-1][j].cValue + gap;
+                               float left      = alignment[i][j-1].cValue + gap;
+                               
+                               if(diagonal >= up){
+                                       if(diagonal >= left){
+                                               alignment[i][j].cValue = diagonal;
+                                               alignment[i][j].prevCell = 'd';
+                                       }
+                                       else{
+                                               alignment[i][j].cValue = left;
+                                               alignment[i][j].prevCell = 'l';
+                                       }
                                }
                                else{
-                                       alignment[i][j].cValue = left;
-                                       alignment[i][j].prevCell = 'l';
+                                       if(up >= left){
+                                               alignment[i][j].cValue = up;
+                                               alignment[i][j].prevCell = 'u';
+                                       }
+                                       else{
+                                               alignment[i][j].cValue = left;
+                                               alignment[i][j].prevCell = 'l';
+                                       }
                                }
                        }
                }
+               Overlap over;                                           
+               over.setOverlap(alignment, lA, lB, 0);          //      Fix gaps at the beginning and end of the sequences
+               traceBack();                                                            //      Traceback the alignment to populate seqAaln and seqBaln
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "NeedlemanOverlap", "align");
+               exit(1);
        }
-       Overlap over;                                           
-       over.setOverlap(alignment, lA, lB, 0);          //      Fix gaps at the beginning and end of the sequences
-       traceBack();                                                            //      Traceback the alignment to populate seqAaln and seqBaln
+
 }
 
 /**************************************************************************************************/
index 4b812a7c628d2816a0d8215cf0b80862b5471626..5f4abd0d15d440cbee9ab4f2f15bbb1aa524c025 100644 (file)
@@ -8,19 +8,11 @@
  */
 
 #include "pintail.h"
-#include "ignoregaps.h"
+#include "eachgapdist.h"
 
 //***************************************************************************************************************
 
-Pintail::Pintail(string name) {
-       try {
-               fastafile = name;
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "Pintail");
-               exit(1);
-       }
-}
+Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
 //***************************************************************************************************************
 
 Pintail::~Pintail() {
@@ -37,24 +29,20 @@ Pintail::~Pintail() {
 void Pintail::print(ostream& out) {
        try {
                
-               for (itCoef = DE.begin(); itCoef != DE.end(); itCoef++) {
+               for (int i = 0; i < querySeqs.size(); i++) {
                        
-                       out << itCoef->first->getName() << '\t' << itCoef->second << endl;
+                       out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << endl;
                        out << "Observed\t";
                        
-                       itObsDist = obsDistance.find(itCoef->first);
-                       for (int i = 0; i < itObsDist->second.size(); i++) {  out << itObsDist->second[i] << '\t';  }
+                       for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
                        out << endl;
                        
                        out << "Expected\t";
                        
-                       itExpDist = expectedDistance.find(itCoef->first);
-                       for (int i = 0; i < itExpDist->second.size(); i++) {  out << itExpDist->second[i] << '\t';  }
+                       for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
                        out << endl;
                        
                }
-               
-               
        }
        catch(exception& e) {
                errorOut(e, "Pintail", "print");
@@ -66,8 +54,6 @@ void Pintail::print(ostream& out) {
 void Pintail::getChimeras() {
        try {
                
-               distCalculator = new ignoreGaps();
-               
                //read in query sequences and subject sequences
                mothurOut("Reading sequences and template file... "); cout.flush();
                querySeqs = readSeqs(fastafile);
@@ -76,71 +62,109 @@ void Pintail::getChimeras() {
                
                int numSeqs = querySeqs.size();
                
-               //if window is set to default
-               if (window == 0) {  if (querySeqs[0]->getAligned().length() > 800) {  setWindow(200); }
-                                                       else{  setWindow((querySeqs[0]->getAligned().length() / 4)); }  } 
-               else if (window > (querySeqs[0]->getAligned().length() / 4)) { 
-                               mothurOut("You have selected to large a window size for you sequences.  I will choose a smaller window."); mothurOutEndLine();
-                               setWindow((querySeqs[0]->getAligned().length() / 4)); 
-               }
-       
-               //calculate number of iters 
-               iters = (querySeqs[0]->getAligned().length() - window + 1) / increment;
-cout << "length = " << querySeqs[0]->getAligned().length() << " window = " << window << " increment = " << increment << " iters = " << iters << endl;                  
+               obsDistance.resize(numSeqs);
+               expectedDistance.resize(numSeqs);
+               seqCoef.resize(numSeqs);
+               DE.resize(numSeqs);
+               Qav.resize(numSeqs);
+               bestfit.resize(numSeqs);
+               trim.resize(numSeqs);
+               deviation.resize(numSeqs);
+               windowSizes.resize(numSeqs, window);
+               
+               //break up file if needed
                int linesPerProcess = processors / numSeqs;
                
-               //find breakup of sequences for all times we will Parallelize
-               if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
-               else {
-                       //fill line pairs
-                       for (int i = 0; i < (processors-1); i++) {                      
-                               lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       //find breakup of sequences for all times we will Parallelize
+                       if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
+                       else {
+                               //fill line pairs
+                               for (int i = 0; i < (processors-1); i++) {                      
+                                       lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+                               }
+                               //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
+                               int i = processors - 1;
+                               lines.push_back(new linePair((i*linesPerProcess), numSeqs));
                        }
-                       //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
-                       int i = processors - 1;
-                       lines.push_back(new linePair((i*linesPerProcess), numSeqs));
-               }
-               
-               //map query sequences to their most similiar sequences in the template - Parallelized
-               mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
-               if (processors == 1) {   findPairs(lines[0]->start, lines[0]->end);  } 
-               else {          createProcessesPairs();         }
-               mothurOut("Done."); mothurOutEndLine();
-               
-               //find Oqs for each sequence - the observed distance in each window - Parallelized
-               mothurOut("Calculating observed percentage differences for each sequence... "); cout.flush();
-               if (processors == 1) {   calcObserved(lines[0]->start, lines[0]->end);  } 
-               else {          createProcessesObserved();              }
-               mothurOut("Done."); mothurOutEndLine();
-               
+               #else
+                       lines.push_back(new linePair(0, numSeqs));
+               #endif
+               
+               distcalculator = new eachGapDist();
+               
+               if (processors == 1) { 
+                       mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
+                       bestfit = findPairs(lines[0]->start, lines[0]->end);
+for (int m = 0; m < templateSeqs.size(); m++)  {
+       if (templateSeqs[m]->getName() == "198806") {  bestfit[0] = *(templateSeqs[m]); }
+       if (templateSeqs[m]->getName() == "198806") {  bestfit[1] = *(templateSeqs[m]); }
+       if (templateSeqs[m]->getName() == "108139") {  bestfit[2] = *(templateSeqs[m]); }
+}
+                       
+for (int j = 0; j < bestfit.size(); j++) {//cout << querySeqs[j]->getName() << '\t' << "length = " <<  querySeqs[j]->getAligned().length() << '\t' << bestfit[j].getName() << " length = " <<  bestfit[j].getAligned().length() <<  endl; 
+                               //chops off beginning and end of sequences so they both start and end with a base
+                               trimSeqs(querySeqs[j], bestfit[j], j);  
+//cout << "NEW SEQ PAIR" << querySeqs[j]->getAligned() << endl << "IN THE MIDDLE" <<  endl << bestfit[j].getAligned() << endl; 
+
+}
+
+                       mothurOut("Done."); mothurOutEndLine();
+
+                       windows = findWindows(lines[0]->start, lines[0]->end);
+               } else {                createProcessesSpots();         }
+
                //find P
-               mothurOut("Calculating expected percentage differences for each sequence... "); cout.flush();
-               vector<float> probabilityProfile = calcFreq(templateSeqs);
+               if (consfile == "") {   probabilityProfile = calcFreq(templateSeqs);  }
+               else                            {   probabilityProfile = readFreq();                      }
                
                //make P into Q
                for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];      }
-
-               //find Qav
-               averageProbability = findQav(probabilityProfile);
-       
-               //find Coefficient - maps a sequence to its coefficient
-               seqCoef = getCoef(averageProbability);
-       
-               //find Eqs for each sequence - the expected distance in each window - Parallelized
-               if (processors == 1) {   calcExpected(lines[0]->start, lines[0]->end);  } 
-               else {          createProcessesExpected();              }
-               mothurOut("Done."); mothurOutEndLine();
                
-               //find deviation - Parallelized
-               mothurOut("Finding deviation from expected... "); cout.flush();
-               if (processors == 1) {   calcDE(lines[0]->start, lines[0]->end);  } 
-               else {          createProcessesDE();            }
-               mothurOut("Done."); mothurOutEndLine();
+               if (processors == 1) { 
+                                               
+                       mothurOut("Calculating observed distance... "); cout.flush();
+                       obsDistance = calcObserved(lines[0]->start, lines[0]->end);
+                       mothurOut("Done."); mothurOutEndLine();
+                       
+                       mothurOut("Finding variability... "); cout.flush();
+                       Qav = findQav(lines[0]->start, lines[0]->end);
+for (int i = 0; i < Qav.size(); i++) {
+cout << querySeqs[i]->getName() << " = ";
+for (int u = 0; u < Qav[i].size();u++) {   cout << Qav[i][u] << '\t';  }
+cout << endl << endl;
+}
+
+
+                       mothurOut("Done."); mothurOutEndLine();
+                       
+                       mothurOut("Calculating alpha... "); cout.flush();
+                       seqCoef = getCoef(lines[0]->start, lines[0]->end);
+for (int i = 0; i < seqCoef.size(); i++) {
+cout << querySeqs[i]->getName() << " coef = " << seqCoef[i] << endl;
+}
+
+                       mothurOut("Done."); mothurOutEndLine();
+                       
+                       mothurOut("Calculating expected distance... "); cout.flush();
+                       expectedDistance = calcExpected(lines[0]->start, lines[0]->end);
+                       mothurOut("Done."); mothurOutEndLine();
+                       
+                       mothurOut("Finding deviation... "); cout.flush();
+                       DE = calcDE(lines[0]->start, lines[0]->end); 
+                       deviation = calcDist(lines[0]->start, lines[0]->end); 
+                       mothurOut("Done."); mothurOutEndLine();
+                       
+                       
+                       
+               } 
+               else {          createProcesses();              }
                
-                               
+               delete distcalculator;
+       
                //free memory
                for (int i = 0; i < lines.size(); i++)                  {       delete lines[i];                }
-               delete distCalculator;  
+                       
 
        }
        catch(exception& e) {
@@ -181,11 +205,83 @@ vector<Sequence*> Pintail::readSeqs(string file) {
        }
 }
 
+//***************************************************************************************************************
+//num is query's spot in querySeqs
+void Pintail::trimSeqs(Sequence* query, Sequence& subject, int num) {
+       try {
+       
+               string q = query->getAligned();
+               string s = subject.getAligned();
+       
+               int front = 0;
+               for (int i = 0; i < q.length(); i++) {
+                       if (isalpha(q[i]) && isalpha(s[i])) { front = i; break;  }
+               }
+               
+               q = q.substr(front, q.length());
+               s = s.substr(front, s.length());
+               
+               int back = 0;           
+               for (int i = q.length(); i >= 0; i--) {
+                       if (isalpha(q[i]) && isalpha(s[i])) { back = i; break;  }
+               }
+       
+               q = q.substr(0, back);
+               s = s.substr(0, back);
+
+               trim[num][front] = back;
+       
+               //save 
+               query->setAligned(q);
+               query->setUnaligned(q);
+               subject.setAligned(s);
+               subject.setUnaligned(s);
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "trimSeqs");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+vector<float> Pintail::readFreq() {
+       try {
+       
+               ifstream in;
+               openInputFile(consfile, in);
+               
+               vector<float> prob;
+               
+               //read in probabilities and store in vector
+               int pos; float num;
+               
+               while(!in.eof()){
+                       
+                       in >> pos >> num;
+                       
+                       prob.push_back(num);
+                       
+                       gobble(in);
+               }
+               
+               in.close();
+               return prob;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "readFreq");
+               exit(1);
+       }
+}
+
 //***************************************************************************************************************
 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
-void Pintail::findPairs(int start, int end) {
+vector<Sequence> Pintail::findPairs(int start, int end) {
        try {
                
+               vector<Sequence> seqsMatches;  seqsMatches.resize(end-start);
+               
                for(int i = start; i < end; i++){
                
                        float smallest = 10000.0;
@@ -195,94 +291,206 @@ void Pintail::findPairs(int start, int end) {
                                
                                Sequence temp = *(templateSeqs[j]);
                                
-                               distCalculator->calcDist(query, temp);
-                               float dist = distCalculator->getDist();
+                               distcalculator->calcDist(query, temp);
+                               float dist = distcalculator->getDist();
                                
                                if (dist < smallest) { 
-                               
-                                       bestfit[querySeqs[i]] = templateSeqs[j];
+                                       seqsMatches[i] = *(templateSeqs[j]);
                                        smallest = dist;
                                }
                        }
                }
                
+               return seqsMatches;
+       
        }
        catch(exception& e) {
                errorOut(e, "Pintail", "findPairs");
                exit(1);
        }
 }
+
 //***************************************************************************************************************
-void Pintail::calcObserved(int start, int end) {
+//find the window breaks for each sequence
+vector< vector<int> > Pintail::findWindows(int start, int end) {
        try {
+               
+               vector< vector<int> > win;  win.resize(end-start);
+               
+               //for each sequence
+               int count = 0;
+               for(int i = start; i < end; i++){
+                       
+                       //if window is set to default
+                       if (windowSizes[i] == 0) {  if (querySeqs[i]->getAligned().length() > 1200) {  windowSizes[i] = 300; }
+                                                       else{  windowSizes[i] = (querySeqs[i]->getAligned().length() / 4); }  } 
+                       else if (windowSizes[i] > (querySeqs[i]->getAligned().length() / 4)) { 
+                                       mothurOut("You have selected to large a window size for sequence " + querySeqs[i]->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
+                                       windowSizes[i] = (querySeqs[i]->getAligned().length() / 4); 
+                       }
        
-                                               
+       //cout << "length = " << querySeqs[i]->getAligned().length() << " window = " << windowSizes[i] << " increment = " << increment << endl;                 
+                               
+
+                       string seq = querySeqs[i]->getAligned();
+                       int numBases = querySeqs[i]->getUnaligned().length();
+                       int spot = 0;
+                       
+                       //find location of first base
+                       for (int j = 0; j < seq.length(); j++) {
+                               if (isalpha(seq[j])) { spot = j;  break;  }
+                       }
+                       
+                       //save start of seq
+                       win[count].push_back(spot);
+                       
+                       
+                       //move ahead increment bases at a time until all bases are in a window
+                       int countBases = 0;
+                       int totalBases = 0;  //used to eliminate window of blanks at end of sequence
+                       for (int m = spot; m < seq.length(); m++) {
+                               
+                               //count number of bases you see
+                               if (isalpha(seq[m])) { countBases++; totalBases++;  }
+                               
+                               //if you have seen enough bases to make a new window
+                               if (countBases >= increment) {
+                                       win[count].push_back(m);  //save spot in alignment
+                                       countBases = 0;                         //reset bases you've seen in this window
+                               }
+                               
+                               //no need to continue if all your bases are in a window
+                               if (totalBases == numBases) {   break;  }
+                       }
+                       
+                       count++;
+               }
+               
+               
+               
+               return win;
+       
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "findWindows");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+vector< vector<float> > Pintail::calcObserved(int start, int end) {
+       try {
+               
+               vector< vector<float> > temp;
+               temp.resize(end-start);
+               
+               int count = 0;
                for(int i = start; i < end; i++){
                
-                       itBest = bestfit.find(querySeqs[i]);
-                       Sequence* query;
-                       Sequence* subject;
+                       Sequence* query = querySeqs[i];
+                       Sequence subject = bestfit[i];
                
-                       if (itBest != bestfit.end()) {
-                               query = itBest->first;
-                               subject = itBest->second;
-                       }else{ mothurOut("Error in calcObserved"); mothurOutEndLine(); }
-//cout << query->getName() << '\t' << subject->getName() << endl;                      
-                       
                        int startpoint = 0; 
-                       for (int m = 0; m < iters; m++) {
+                       for (int m = 0; m < windows[i].size(); m++) {
 
-                               string seqFrag = query->getAligned().substr(startpoint, window);
-                               string seqFragsub = subject->getAligned().substr(startpoint, window);
+                               string seqFrag = query->getAligned().substr(windows[i][startpoint], windowSizes[i]);
+                               string seqFragsub = subject.getAligned().substr(windows[i][startpoint], windowSizes[i]);
                                                                
                                int diff = 0;
                 for (int b = 0; b < seqFrag.length(); b++) {
                   
-                    //if this is not a gap
-                    if ((isalpha(seqFrag[b])) && (isalpha(seqFragsub[b]))) {
+                    //if either the query or subject is not a gap 
+                    if ((isalpha(seqFrag[b])) || (isalpha(seqFragsub[b]))) {
                         //and they are different - penalize
                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
                     }
                 }
                
                 //percentage of mismatched bases
-                float dist = diff / (float)seqFrag.length();       
+                               float dist;
+                dist = diff / (float) seqFrag.length() * 100;       
                                
-                               obsDistance[query].push_back(dist);
+                               temp[count].push_back(dist);
                                
-                               startpoint += increment;
+                               startpoint++;
                        }
+                       
+                       count++;
                }
                
+               return temp;
        }
        catch(exception& e) {
                errorOut(e, "Pintail", "calcObserved");
                exit(1);
        }
 }
+//***************************************************************************************************************
+vector<float>  Pintail::calcDist(int start, int end) {
+       try {
+               
+               vector<float> temp;
+                               
+               for(int i = start; i < end; i++){
+               
+                       Sequence* query = querySeqs[i];
+                       Sequence subject = bestfit[i];
+                       
+                       string seqFrag = query->getAligned();
+                       string seqFragsub = subject.getAligned();
+                                                                                                               
+                       int diff = 0;
+                       for (int b = 0; b < seqFrag.length(); b++) {
+                  
+                               //if either the query or subject is not a gap 
+                               if ((isalpha(seqFrag[b])) || (isalpha(seqFragsub[b]))) {
+                                       //and they are different - penalize
+                                       if (seqFrag[b] != seqFragsub[b]) { diff++; }
+                               }
+                       }
+               
+                       //percentage of mismatched bases
+                       float dist;
+                       dist = diff / (float) seqFrag.length() * 100;       
+                               
+                       temp.push_back(dist);
+               }
+               
+               return temp;
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "calcDist");
+               exit(1);
+       }
+}
 
 //***************************************************************************************************************
-void Pintail::calcExpected(int start, int end) {
+vector< vector<float> > Pintail::calcExpected(int start, int end) {
        try {
                
-       
+               vector< vector<float> > temp; temp.resize(end-start);
+               
                //for each sequence
+               int count = 0;
                for(int i = start; i < end; i++){
                        
-                       itCoef = seqCoef.find(querySeqs[i]);
-                       float coef = itCoef->second;
+                       float coef = seqCoef[i];
                        
                        //for each window
                        vector<float> queryExpected;
-                       for (int m = 0; m < iters; m++) {               
-                               float expected = averageProbability[m] * coef;
+                       for (int m = 0; m < windows[i].size(); m++) {           
+                               float expected = Qav[i][m] * coef;
                                queryExpected.push_back(expected);      
 //cout << "average variabilty over window = " << averageProbability[m] << " coef = " << coef << " ei = "  << expected << '\t' <<  "window = " << m << endl;
                        }
                        
-                       expectedDistance[querySeqs[i]] = queryExpected;
+                       temp[count] = queryExpected;
+                       
+                       count++;
                        
                }
+               
+               return temp;
                                
        }
        catch(exception& e) {
@@ -291,28 +499,30 @@ void Pintail::calcExpected(int start, int end) {
        }
 }
 //***************************************************************************************************************
-void Pintail::calcDE(int start, int end) {
+vector<float> Pintail::calcDE(int start, int end) {
        try {
                
+               vector<float> temp; temp.resize(end-start);
        
                //for each sequence
+               int count = 0;
                for(int i = start; i < end; i++){
                        
-                       itObsDist = obsDistance.find(querySeqs[i]);
-                       vector<float> obs = itObsDist->second;
+                       vector<float> obs = obsDistance[i];
+                       vector<float> exp = expectedDistance[i];
                        
-                       itExpDist = expectedDistance.find(querySeqs[i]);
-                       vector<float> exp = itExpDist->second;
 //     cout << "difference between obs and exp = " << abs(obs[m] - exp[m]) << endl;    
                        //for each window
                        float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
-                       for (int m = 0; m < iters; m++) {               sum += ((obs[m] - exp[m]) * (obs[m] - exp[m]));         }
+                       for (int m = 0; m < windows[i].size(); m++) {           sum += ((obs[m] - exp[m]) * (obs[m] - exp[m]));         }
                        
-                       float de = sqrt((sum / (iters - 1)));
+                       float de = sqrt((sum / (windows[i].size() - 1)));
                        
-                       DE[querySeqs[i]] = de;
+                       temp[count] = de;
+                       count++;
                }
-                               
+               
+               return temp;
        }
        catch(exception& e) {
                errorOut(e, "Pintail", "calcDE");
@@ -326,6 +536,10 @@ vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
        try {
 
                vector<float> prob;
+               string freqfile = getRootName(templateFile) + "probability";
+               ofstream outFreq;
+               
+               openOutputFile(freqfile, outFreq);
                
                //at each position in the sequence
                for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
@@ -347,19 +561,25 @@ vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
                        
                        //find base with highest frequency
                        int highest = 0;
-                       for (int m = 0; m < freq.size(); m++) {    if (freq[m] > highest) {  highest = freq[m];  }              }
-                       
-                       //add in gaps - so you can effectively "ignore them"
-                       highest += gaps;
+                       for (int m = 0; m < freq.size(); m++) {   if (freq[m] > highest) {  highest = freq[m];  }               }
                        
-                       float highFreq = highest / (float) seqs.size(); 
+                       float highFreq;
+                       //if ( (seqs.size() - gaps) == 0 ) {  highFreq = 1.0;  }                        
+                       //else { highFreq = highest / (float) (seqs.size() - gaps);      }
+                       highFreq = highest / (float) seqs.size();
+cout << i << '\t' << highFreq << endl;
                        
                        float Pi;
-                       Pi =  (highFreq - 0.25) / 0.75;  
+                       Pi =  (highFreq - 0.25) / 0.75; 
+                       
+                       //saves this for later
+                       outFreq << i << '\t' << Pi << endl;
                                
                        prob.push_back(Pi); 
                }
                
+               outFreq.close();
+               
                return prob;
                                
        }
@@ -369,23 +589,32 @@ vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
        }
 }
 //***************************************************************************************************************
-vector<float> Pintail::findQav(vector<float> prob) {
+vector< vector<float> > Pintail::findQav(int start, int end) {
        try {
-               vector<float> averages;
+               vector< vector<float> > averages; 
+               map<int, int>::iterator it;
                
-               //for each window find average
-               int startpoint = 0;
-               for (int m = 0; m < iters; m++) {
-                       
-                       float average = 0.0;
-                       for (int i = startpoint; i < (startpoint+window); i++) {   average += prob[i];  }
-                       
-                       average = average / window;
-//cout << average << endl;                     
-                       //save this windows average
-                       averages.push_back(average);
+               for(int i = start; i < end; i++){
                
-                       startpoint += increment;
+                       //for each window find average
+                       vector<float> temp;
+                       for (int m = 0; m < windows[i].size(); m++) {
+                               
+                               float average = 0.0;
+                               
+                               it = trim[i].begin();  //trim[i] is a map of where this sequence was trimmed
+                               
+                               //while you are in the window for this sequence
+                               for (int j = windows[i][m]+it->first; j < (windows[i][m]+windowSizes[i]); j++) {   average += probabilityProfile[j];    }
+                               
+                               average = average / windowSizes[i];
+       //cout << average << endl;                      
+                               //save this windows average
+                               temp.push_back(average);
+                       }
+                       
+                       //save this qav
+                       averages.push_back(temp);
                }
                
                return averages;
@@ -396,31 +625,34 @@ vector<float> Pintail::findQav(vector<float> prob) {
        }
 }
 //***************************************************************************************************************
-map<Sequence*, float> Pintail::getCoef(vector<float> prob) {
+vector<float> Pintail::getCoef(int start, int end) {
        try {
-               map<Sequence*, float> coefs;
+               vector<float> coefs;
+               coefs.resize(end-start);
                
-               //find average prob
-               float probAverage = 0.0;
-               for (int i = 0; i < prob.size(); i++) {   probAverage += prob[i];       }
-               probAverage = probAverage / (float) prob.size();
-cout << "(sum of ai) / m = " << probAverage << endl;           
                //find a coef for each sequence
-               map<Sequence*, vector<float> >::iterator it;
-               for (it = obsDistance.begin(); it != obsDistance.end(); it++) {
-                       
-                       vector<float> temp = it->second;
-                       Sequence* tempSeq = it->first;
+               int count = 0;
+               for(int i = start; i < end; i++){
+               
+                       //find average prob for this seqs windows
+                       float probAverage = 0.0;
+                       for (int j = 0; j < Qav[i].size(); j++) {   probAverage += Qav[i][j];   }
+                       probAverage = probAverage / (float) Qav[i].size();
+       cout << "(sum of ai) / m = " << probAverage << endl;            
+
+                       vector<float> temp = obsDistance[i];
                        
                        //find observed average 
                        float obsAverage = 0.0;
-                       for (int i = 0; i < temp.size(); i++) {   obsAverage += temp[i];        }
+                       for (int j = 0; j < temp.size(); j++) {   obsAverage += temp[j];        }
                        obsAverage = obsAverage / (float) temp.size();
-cout << tempSeq->getName() << '\t' << obsAverage << endl;                      
+cout << "(sum of oi) / m = " << obsAverage << endl;            
                        float coef = obsAverage / probAverage;
-cout  << tempSeq->getName() << '\t' << "coef = " << coef << endl;                      
+               
                        //save this sequences coefficient
-                       coefs[tempSeq] = coef;
+                       coefs[count] = coef;
+                       
+                       count++;
                }
                
                                                
@@ -432,13 +664,16 @@ cout  << tempSeq->getName() << '\t' << "coef = " << coef << endl;
        }
 }
 
+
 /**************************************************************************************************/
 
-void Pintail::createProcessesPairs() {
+void Pintail::createProcessesSpots() {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
                vector<int> processIDS;
+               vector< vector<int> > win; win.resize(querySeqs.size());
+               vector< map <int, int> > t; t.resize(querySeqs.size());
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -448,7 +683,31 @@ void Pintail::createProcessesPairs() {
                                processIDS.push_back(pid);  
                                process++;
                        }else if (pid == 0){
-                               findPairs(lines[process]->start, lines[process]->end);
+                               
+                               vector<Sequence> tempbest;
+                               tempbest = findPairs(lines[process]->start, lines[process]->end);
+                               int count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       bestfit[i] = tempbest[count];
+                                       
+                                       //chops off beginning and end of sequences so they both start and end with a base
+                                       trimSeqs(querySeqs[i], bestfit[i], i);
+                                       t[i] = trim[i];
+                                       
+                                       count++;
+                               }
+                               
+                               
+                               
+                               vector< vector<int> > temp = findWindows(lines[process]->start, lines[process]->end);
+                               
+                               //move into best
+                               count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       win[i] = temp[count];
+                                       count++;
+                               }
+                               
                                exit(0);
                        }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
                }
@@ -459,62 +718,32 @@ void Pintail::createProcessesPairs() {
                        wait(&temp);
                }
                
+               windows = win;
+               trim = t;
 #else
-               findPairs(lines[0]->start, lines[0]->end);
+               windows = findWindows(lines[0]->start, lines[0]->end);
 
 #endif         
        }
        catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesPairs");
+               errorOut(e, "Pintail", "createProcessesSpots");
                exit(1);
        }
 }
 
+
 /**************************************************************************************************/
 
-void Pintail::createProcessesObserved() {
+void Pintail::createProcesses() {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
                vector<int> processIDS;
                
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               calcObserved(lines[process]->start, lines[process]->end);
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
+               vector< vector<float> > exp;  exp.resize(querySeqs.size());
+               vector<float> de; de.resize(querySeqs.size());
+               vector< vector<float> > obs; obs.resize(querySeqs.size());
                
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-#else
-               calcObserved(lines[0]->start, lines[0]->end);
-
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesObserved");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-
-void Pintail::createProcessesExpected() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -524,45 +753,48 @@ void Pintail::createProcessesExpected() {
                                processIDS.push_back(pid);  
                                process++;
                        }else if (pid == 0){
-                               calcExpected(lines[process]->start, lines[process]->end);
-                               exit(0);
-                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
-               }
-               
-               //force parent to wait until all the processes are done
-               for (int i=0;i<processors;i++) { 
-                       int temp = processIDS[i];
-                       wait(&temp);
-               }
-               
-#else
-               calcExpected(lines[0]->start, lines[0]->end);
+                               
+                               vector< vector<float> > temp;
+                               vector<float> tempde;
+                               int count = 0;
+                               
+                               
+                               temp = calcObserved(lines[process]->start, lines[process]->end);
+                               count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       obs[i] = temp[count];
+                                       count++;
+                               }
 
-#endif         
-       }
-       catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesExpected");
-               exit(1);
-       }
-}
+                               temp = findQav(lines[process]->start, lines[process]->end);
+                               count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       Qav[i] = temp[count];
+                                       count++;
+                               }
+                               
+                               tempde = getCoef(lines[process]->start, lines[process]->end);
+                               count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       seqCoef[i] = tempde[count];
+                                       count++;
+                               }
+                               
+                               temp = calcExpected(lines[process]->start, lines[process]->end);
+                               count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       exp[i] = temp[count];
+                                       count++;
+                               }
 
-/**************************************************************************************************/
+                               
+                               tempde = calcDE(lines[process]->start, lines[process]->end); 
+                               count = 0;
+                               for (int i = lines[process]->start; i < lines[process]->end; i++) {
+                                       de[i] = tempde[count];
+                                       count++;
+                               }
 
-void Pintail::createProcessesDE() {
-       try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 0;
-               vector<int> processIDS;
-               
-               //loop through and create all the processes you want
-               while (process != processors) {
-                       int pid = fork();
-                       
-                       if (pid > 0) {
-                               processIDS.push_back(pid);  
-                               process++;
-                       }else if (pid == 0){
-                               calcDE(lines[process]->start, lines[process]->end);
                                exit(0);
                        }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
                }
@@ -573,13 +805,22 @@ void Pintail::createProcessesDE() {
                        wait(&temp);
                }
                
+               obsDistance = obs;
+               expectedDistance = exp;
+               DE = de;
+               
 #else
-               calcDE(lines[0]->start, lines[0]->end);
+               bestfit = findPairs(lines[0]->start, lines[0]->end);
+               obsDistance = calcObserved(lines[0]->start, lines[0]->end);
+               Qav = findQav(lines[0]->start, lines[0]->end);
+               seqCoef = getCoef(lines[0]->start, lines[0]->end);
+               expectedDistance = calcExpected(lines[0]->start, lines[0]->end);
+               DE = calcDE(lines[0]->start, lines[0]->end); 
 
 #endif         
        }
        catch(exception& e) {
-               errorOut(e, "Pintail", "createProcessesDE");
+               errorOut(e, "Pintail", "createProcesses");
                exit(1);
        }
 }
index 7c0f81bca5659a0f3fa56357dd70fe5498570334..797c99e1543ace2df4a142015a17fe56eaf4f033 100644 (file)
--- a/pintail.h
+++ b/pintail.h
 class Pintail : public Chimera {
        
        public:
-               Pintail(string);        
+               Pintail(string, string);        
                ~Pintail();
                
                void getChimeras();
                void print(ostream&);
                
+               void setCons(string c) { consfile = c;  }
+               
                
        private:
        
@@ -37,41 +39,47 @@ class Pintail : public Chimera {
                        linePair(int i, int j) : start(i), end(j) {}
                };
 
-       
-               Dist* distCalculator;
-               string fastafile;
+               Dist* distcalculator;
                int iters;
+               string fastafile, templateFile, consfile;
                vector<linePair*> lines;
                vector<Sequence*> querySeqs;
                vector<Sequence*> templateSeqs;
                
-               map<Sequence*, Sequence*> bestfit;  //maps a query sequence to its most similiar sequence in the template
-               map<Sequence*, Sequence*>::iterator itBest;
+               vector<Sequence> bestfit;  //bestfit[0] matches queryseqs[0]...
                
-               map<Sequence*, vector<float> > obsDistance;  //maps a query sequence to its observed distance at each window
-               map<Sequence*, vector<float> > expectedDistance;  //maps a query sequence to its expected distance at each window
-               map<Sequence*, vector<float> >::iterator itObsDist;
-               map<Sequence*, vector<float> >::iterator itExpDist;
+               vector< vector<float> > obsDistance;  //obsDistance[0] is the vector of observed distances for queryseqs[0]... 
+               vector< vector<float> > expectedDistance;  //expectedDistance[0] is the vector of expected distances for queryseqs[0]... 
+               vector<float> deviation;  //deviation[0] is the percentage of mismatched pairs over the whole seq between querySeqs[0] and its best match.
+               vector< vector<int> > windows;  // windows[0] is a vector containing the starting spot in queryseqs[0] aligned sequence for each window.
+                                                                               //this is needed so you can move by bases and not just spots in the alignment
+               vector< map<int, int> >  trim;  //trim[0] is the start and end position of trimmed querySeqs[0].  Used to find the variability over each sequence window.
+                                                                               
+               vector<int> windowSizes;    //windowSizes[0] = window size of querySeqs[0]
                
-               vector<float> averageProbability;                       //Qav
-               map<Sequence*, float> seqCoef;                          //maps a sequence to its coefficient
-               map<Sequence*, float> DE;                                       //maps a sequence to its deviation
-               map<Sequence*, float>::iterator itCoef; 
+               vector< vector<float> > Qav;    //Qav[0] is the vector of average variablility for queryseqs[0]... 
+               vector<float>  seqCoef;                         //seqCoef[0] is the coeff for queryseqs[0]...
+               vector<float> DE;                                       //DE[0] is the deviaation for queryseqs[0]...
+               vector<float> probabilityProfile;
                
                vector<Sequence*> readSeqs(string);
-               vector<float> findQav(vector<float>);
+               void trimSeqs(Sequence*, Sequence&, int);
+               vector<float> readFreq();
+               vector< vector<float> > findQav(int, int);  
                vector<float> calcFreq(vector<Sequence*>);
-               map<Sequence*, float> getCoef(vector<float>);
+               vector<float> getCoef(int, int);
                
-               void findPairs(int, int);
-               void calcObserved(int, int);
-               void calcExpected(int, int);
-               void calcDE(int, int);
+               vector<Sequence> findPairs(int, int);
+               vector< vector<int> > findWindows(int, int);
+               vector< vector<float> > calcObserved(int, int);
+               vector< vector<float> > calcExpected(int, int);
+               vector<float> calcDE(int, int);
+               vector<float> calcDist(int, int);
        
-               void createProcessesPairs();
-               void createProcessesObserved();
-               void createProcessesExpected();
-               void createProcessesDE();
+               void createProcessesSpots();
+               void createProcesses();
+               
+               
                
 };
 
index 402e0d9f1911bb0b19d27fc0aeac661826c1683d..803e95368240b15e8bee33ff4b22842fa168519d 100644 (file)
@@ -29,7 +29,7 @@ EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
                //create and initialize trees to 0.
                initialTree(numGroups); 
                
-               //loop through vectors calculating the f11, f1A, f2A, f1B, f2B, S12 values
+               
                for (int i = 0; i < shared[0]->size(); i++) {
                        //get bin values and calc shared 
                        bool sharedByAll = true;
@@ -50,7 +50,7 @@ EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
                //calculate chao1, (numleaves-1) because numleaves contains the ++ values.
                bool bias;
                for(int i=0;i<numLeaves;i++){
-                       if (f2leaves[i]->lvalue == 0) { bias = true;}// break;}
+                       if ((f2leaves[i]->lvalue == 0) || (f2leaves[i]->rvalue == 0))  { bias = true;  }// break;}
                }
 
                if(bias){
@@ -60,6 +60,7 @@ EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
                                if (i != (numLeaves-1)) {
                                        rightvalue = (float)(f1leaves[i]->rvalue * (f1leaves[i]->rvalue - 1)) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * (f2leaves[i]->rvalue + 1));
                                }else{
+                                       //add in sobs
                                        rightvalue = (float)(f1leaves[i]->rvalue);
                                }
                                Chao += leftvalue + rightvalue;
@@ -73,6 +74,7 @@ EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
                                if (i != (numLeaves-1)) {
                                        rightvalue = (float)(f1leaves[i]->rvalue * f1leaves[i]->rvalue) / (float)((pow(2, (float)f2leaves[i]->rcoef)) * f2leaves[i]->rvalue);
                                }else{
+                                       //add in sobs
                                        rightvalue = (float)(f1leaves[i]->rvalue);
                                }
                                Chao += leftvalue + rightvalue;
@@ -84,7 +86,7 @@ EstOutput SharedChao1::getValues(vector<SharedRAbundVector*> shared){
                        delete f2leaves[i];
                }
                
-
+               
                data[0] = Chao;
                return data;
        }
index 217dd3594081703b6cf7a28857cd553b62bcfaec..7cb2134fb475b76284c6dcfd857dac5a67f9f4b1 100644 (file)
--- a/venn.cpp
+++ b/venn.cpp
@@ -133,9 +133,9 @@ void Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculator*> vCalcs
                                        singleCalc = new Sobs();
                                }else if (vCalcs[i]->getName() == "sharedchao") {
                                        singleCalc = new Chao1();
-                               }else if (vCalcs[i]->getName() == "sharedace") {
-                                       singleCalc = new Ace(10);
-                               }
+                               }//else if (vCalcs[i]->getName() == "sharedace") {
+                                       //singleCalc = new Ace(10);
+                               //}
                                
                                //get estimates for numA
                                vector<double> numA = singleCalc->getValues(sabundA);
@@ -190,154 +190,234 @@ void Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculator*> vCalcs
                        //make a file for each calculator
                        for(int i=0;i<vCalcs.size();i++){
                        
-                               //in essence you want to run it like a single 
-                               if (vCalcs[i]->getName() == "sharedsobs") {
-                                       singleCalc = new Sobs();
-                               }else if (vCalcs[i]->getName() == "sharedchao") {
-                                       singleCalc = new Chao1();
-                               }else if (vCalcs[i]->getName() == "sharedace") {
-                                       singleCalc = new Ace(10);
-                               }
+                                                                                               
+                               string filenamesvg = getRootName(globaldata->inputFileName) + lookup[0]->getLabel() + ".venn." + vCalcs[i]->getName() + ".svg";
+                               openOutputFile(filenamesvg, outsvg);
                                
-                               //get estimates for numA
-                               vector<double> numA = singleCalc->getValues(sabundA);
+                               if (vCalcs[i]->getName() == "sharedace") {
+                               
+                                       singleCalc = new Ace(10);
+                                       
+                                       //get estimates for numA
+                                       vector<double> numA = singleCalc->getValues(sabundA);
                        
-                               //get estimates for numB
-                               vector<double> numB = singleCalc->getValues(sabundB);
+                                       //get estimates for numB
+                                       vector<double> numB = singleCalc->getValues(sabundB);
                                
-                               //get estimates for numC
-                               vector<double> numC = singleCalc->getValues(sabundC);
+                                       //get estimates for numC
+                                       vector<double> numC = singleCalc->getValues(sabundC);
 
-                               
-                               string filenamesvg = getRootName(globaldata->inputFileName) + lookup[0]->getLabel() + ".venn." + vCalcs[i]->getName() + ".svg";
-                               openOutputFile(filenamesvg, outsvg);
 
-                               //get estimates for sharedAB, sharedAC and sharedBC
-                               subset.clear();
-                               subset.push_back(lookup[0]); subset.push_back(lookup[1]);
-                               vector<double> sharedAB = vCalcs[i]->getValues(subset);
-                               
-                               subset.clear();
-                               subset.push_back(lookup[0]); subset.push_back(lookup[2]);
-                               vector<double> sharedAC = vCalcs[i]->getValues(subset);
-                               
-                               subset.clear();
-                               subset.push_back(lookup[1]); subset.push_back(lookup[2]);
-                               vector<double> sharedBC = vCalcs[i]->getValues(subset);
-                               
-                               vector<double> sharedAwithBC;
-                               vector<double> sharedBwithAC;
-                               vector<double> sharedCwithAB;
-                               
-                               //find possible sharedABC values
-                               float sharedABC1 = 0.0; float sharedABC2 = 0.0; float sharedABC3 = 0.0; float sharedABC = 0.0;
+                                       //get estimates for sharedAB, sharedAC and sharedBC
+                                       subset.clear();
+                                       subset.push_back(lookup[0]); subset.push_back(lookup[1]);
+                                       vector<double> sharedAB = vCalcs[i]->getValues(subset);
+                                       
+                                       subset.clear();
+                                       subset.push_back(lookup[0]); subset.push_back(lookup[2]);
+                                       vector<double> sharedAC = vCalcs[i]->getValues(subset);
+                                       
+                                       subset.clear();
+                                       subset.push_back(lookup[1]); subset.push_back(lookup[2]);
+                                       vector<double> sharedBC = vCalcs[i]->getValues(subset);
+                                       
+                                       vector<double> sharedAwithBC;
+                                       vector<double> sharedBwithAC;
+                                       vector<double> sharedCwithAB;
+                                       
+                                       //find possible sharedABC values
+                                       float sharedABC1 = 0.0; float sharedABC2 = 0.0; float sharedABC3 = 0.0; float sharedABC = 0.0;
 
-                               if (vCalcs[i]->getMultiple() == false) {
-                                       //merge BC and estimate with shared with A
-                                       SharedRAbundVector* merge = new SharedRAbundVector();
-                                       for (int j = 0; j < lookup[1]->size(); j++) {
-                                               merge->push_back((lookup[1]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
-                                       }
+                                       if (vCalcs[i]->getMultiple() == false) {
+                                               //merge BC and estimate with shared with A
+                                               SharedRAbundVector* merge = new SharedRAbundVector();
+                                               for (int j = 0; j < lookup[1]->size(); j++) {
+                                                       merge->push_back((lookup[1]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
+                                               }
+                                       
+                                               subset.clear();
+                                               subset.push_back(lookup[0]); subset.push_back(merge);
+                                               sharedAwithBC = vCalcs[i]->getValues(subset);
                                
-                                       subset.clear();
-                                       subset.push_back(lookup[0]); subset.push_back(merge);
-                                       sharedAwithBC = vCalcs[i]->getValues(subset);
-                       
-                                       delete merge;
-                                       //merge AC and estimate with shared with B
-                                       merge = new SharedRAbundVector();
-                                       for (int j = 0; j < lookup[0]->size(); j++) {
-                                               merge->push_back((lookup[0]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
-                                       }
+                                               delete merge;
+                                               //merge AC and estimate with shared with B
+                                               merge = new SharedRAbundVector();
+                                               for (int j = 0; j < lookup[0]->size(); j++) {
+                                                       merge->push_back((lookup[0]->getAbundance(j) + lookup[2]->getAbundance(j)), j, "");
+                                               }
+                                       
+                                               subset.clear();
+                                               subset.push_back(merge); subset.push_back(lookup[1]);
+                                               sharedBwithAC = vCalcs[i]->getValues(subset);
                                
-                                       subset.clear();
-                                       subset.push_back(merge); subset.push_back(lookup[1]);
-                                       sharedBwithAC = vCalcs[i]->getValues(subset);
+                                               delete merge;
+                                               //merge AB and estimate with shared with C
+                                               merge = new SharedRAbundVector();
+                                               for (int j = 0; j < lookup[0]->size(); j++) {
+                                                       merge->push_back((lookup[0]->getAbundance(j) + lookup[1]->getAbundance(j)), j, "");
+                                               }
+                                       
+                                               subset.clear();
+                                               subset.push_back(lookup[2]); subset.push_back(merge);
+                                               sharedCwithAB = vCalcs[i]->getValues(subset);
+                                               delete merge;
+                                       
+                                               sharedABC1 = sharedAB[0] + sharedAC[0] - sharedAwithBC[0];
+                                               sharedABC2 = sharedAB[0] + sharedBC[0] - sharedBwithAC[0];
+                                               sharedABC3 = sharedAC[0] + sharedBC[0] - sharedCwithAB[0];
+        
+                                               //if any of the possible m's are - throw them out
+                                               if (sharedABC1 < 0.00001) { sharedABC1 = 0; }
+                                               if (sharedABC2 < 0.00001) { sharedABC2 = 0; }
+                                               if (sharedABC3 < 0.00001) { sharedABC3 = 0; }
                        
-                                       delete merge;
-                                       //merge AB and estimate with shared with C
-                                       merge = new SharedRAbundVector();
-                                       for (int j = 0; j < lookup[0]->size(); j++) {
-                                               merge->push_back((lookup[0]->getAbundance(j) + lookup[1]->getAbundance(j)), j, "");
+                                               //sharedABC is the minimum of the 3 possibilities
+                                               if ((sharedABC1 < sharedABC2) && (sharedABC1 < sharedABC3)) { sharedABC = sharedABC1; }
+                                               else if ((sharedABC2 < sharedABC1) && (sharedABC2 < sharedABC3)) { sharedABC = sharedABC2; }
+                                               else if ((sharedABC3 < sharedABC1) && (sharedABC3 < sharedABC2)) { sharedABC = sharedABC3; }    
+                                       }else{
+                                               vector<double> data = vCalcs[i]->getValues(lookup);
+                                               sharedABC = data[0];
+                                               sharedAwithBC.push_back(sharedAB[0] + sharedAC[0] - sharedABC);
+                                               sharedBwithAC.push_back(sharedAB[0] + sharedBC[0] - sharedABC);
+                                               sharedCwithAB.push_back(sharedAC[0] + sharedBC[0] - sharedABC);
                                        }
+                                       
+                                       //image window
+                                       outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 800 800\" >\n";
+                                       outsvg << "<g>\n";
+
+                                       //draw circles
+                                       outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"800\" height=\"800\"/>"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
+                                       outsvg << "<circle fill=\"rgb(255,0,0)\" opacity=\".3\" stroke=\"black\" cx=\"230\" cy=\"200\" r=\"150\"/>"; 
+                                       outsvg << "<circle fill=\"rgb(0,255,0)\" opacity=\".3\" stroke=\"black\" cx=\"455\" cy=\"200\" r=\"150\"/>"; 
+                                       outsvg << "<circle fill=\"rgb(0,0,255)\" opacity=\".3\" stroke=\"black\" cx=\"343\" cy=\"400\" r=\"150\"/>"; 
+
+                                       //place labels within overlaps
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)toString(numA[0]-sharedAwithBC[0]).length() / 2)) + "\" y=\"170\">" + toString(numA[0]-sharedAwithBC[0]) + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[0]->getGroup() + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedAB[0] - sharedABC).length() / 2)) + "\"  y=\"170\">" + toString(sharedAB[0] - sharedABC) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)toString(numB[0]-sharedBwithAC[0]).length() / 2)) + "\"  y=\"170\">" + toString(numB[0]-sharedBwithAC[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)lookup[1]->getGroup().length() / 2)) + "\"  y=\"150\">" + lookup[1]->getGroup() + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(268 - ((int)toString(sharedAC[0] - sharedABC).length() / 2)) + "\"  y=\"305\">" + toString(sharedAC[0] - sharedABC) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(343 - ((int)toString(numC[0]-sharedCwithAB[0]).length() / 2)) + "\"   y=\"430\">" + toString(numC[0]-sharedCwithAB[0]) + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(343 - ((int)lookup[2]->getGroup().length() / 2)) + "\"   y=\"410\">" + lookup[2]->getGroup() + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(408 - ((int)toString(sharedBC[0] - sharedABC).length() / 2)) + "\" y=\"305\">" + toString(sharedBC[0] - sharedABC) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedABC).length() / 2)) + "\"  y=\"280\">" + toString(sharedABC) + "</text>\n"; 
                                
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"660\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"680\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"700\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"720\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"740\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"760\">The number of sepecies shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"580\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]);
+                                       if (numA.size() == 3) { 
+                                               outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "</text>\n";
+                                       }else { outsvg << "</text>\n"; }
+                       
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"600\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]);
+                                       if (numB.size() == 3) { 
+                                               outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "</text>\n";
+                                       }else { outsvg << "</text>\n"; }
+                                       
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"620\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]);
+                                       if (numC.size() == 3) { 
+                                               outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "</text>\n";
+                                       }else { outsvg << "</text>\n"; }
+
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"640\">The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"780\">The total shared richness is " + toString(sharedABC) + "</text>\n";
+                                       
+                                       delete singleCalc;
+                                       
+                               }else { //sharedchao and sharedsobs are multigroup
+                                       
+                                       vector<SharedRAbundVector*> subset;
+
+                                       //get estimates for numA
+                                       subset.push_back(lookup[0]);
+                                       vector<double> numA = vCalcs[i]->getValues(subset);
+                       
+                                       //get estimates for numB
                                        subset.clear();
-                                       subset.push_back(lookup[2]); subset.push_back(merge);
-                                       sharedCwithAB = vCalcs[i]->getValues(subset);
-                                       delete merge;
-                               
-                                       sharedABC1 = sharedAB[0] + sharedAC[0] - sharedAwithBC[0];
-                                       sharedABC2 = sharedAB[0] + sharedBC[0] - sharedBwithAC[0];
-                                       sharedABC3 = sharedAC[0] + sharedBC[0] - sharedCwithAB[0];
-                                       //if any of the possible m's are - throw them out
-                                       if (sharedABC1 < 0.00001) { sharedABC1 = 0; }
-                                       if (sharedABC2 < 0.00001) { sharedABC2 = 0; }
-                                       if (sharedABC3 < 0.00001) { sharedABC3 = 0; }
-               
-                                       //sharedABC is the minimum of the 3 possibilities
-                                       if ((sharedABC1 < sharedABC2) && (sharedABC1 < sharedABC3)) { sharedABC = sharedABC1; }
-                                       else if ((sharedABC2 < sharedABC1) && (sharedABC2 < sharedABC3)) { sharedABC = sharedABC2; }
-                                       else if ((sharedABC3 < sharedABC1) && (sharedABC3 < sharedABC2)) { sharedABC = sharedABC3; }    
-                               }else{
-                                       vector<double> data = vCalcs[i]->getValues(lookup);
-                                       sharedABC = data[0];
-                                       sharedAwithBC.push_back(sharedAB[0] + sharedAC[0] - sharedABC);
-                                       sharedBwithAC.push_back(sharedAB[0] + sharedBC[0] - sharedABC);
-                                       sharedCwithAB.push_back(sharedAC[0] + sharedBC[0] - sharedABC);
-                               }
-               
-                               //image window
-                               outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 800 800\" >\n";
-                               outsvg << "<g>\n";
+                                       subset.push_back(lookup[1]);
+                                       vector<double> numB = vCalcs[i]->getValues(subset);
+                               
+                                       //get estimates for numC
+                                       subset.clear();
+                                       subset.push_back(lookup[2]);
+                                       vector<double> numC = vCalcs[i]->getValues(subset);
 
-                               //draw circles
-                               outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"800\" height=\"800\"/>"; 
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
-                               outsvg << "<circle fill=\"rgb(255,0,0)\" opacity=\".3\" stroke=\"black\" cx=\"230\" cy=\"200\" r=\"150\"/>"; 
-                               outsvg << "<circle fill=\"rgb(0,255,0)\" opacity=\".3\" stroke=\"black\" cx=\"455\" cy=\"200\" r=\"150\"/>"; 
-                               outsvg << "<circle fill=\"rgb(0,0,255)\" opacity=\".3\" stroke=\"black\" cx=\"343\" cy=\"400\" r=\"150\"/>"; 
+                                       subset.clear();
+                                       subset.push_back(lookup[0]); subset.push_back(lookup[1]);
+                                       vector<double> sharedab =  vCalcs[i]->getValues(subset);
+                                       
+                                       subset.clear(); 
+                                       subset.push_back(lookup[0]); subset.push_back(lookup[2]);
+                                       vector<double> sharedac =  vCalcs[i]->getValues(subset);
+                                       
+                                       subset.clear(); 
+                                       subset.push_back(lookup[1]); subset.push_back(lookup[2]);
+                                       vector<double> sharedbc =  vCalcs[i]->getValues(subset);
+                                       
+                                       subset.clear(); 
+                                       subset.push_back(lookup[0]); subset.push_back(lookup[1]); subset.push_back(lookup[2]);
+                                       vector<double> sharedabc =  vCalcs[i]->getValues(subset);
+                                       
+                                       //image window
+                                       outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 800 800\" >\n";
+                                       outsvg << "<g>\n";
 
-                               //place labels within overlaps
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)toString(numA[0]-sharedAwithBC[0]).length() / 2)) + "\" y=\"170\">" + toString(numA[0]-sharedAwithBC[0]) + "</text>\n"; 
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[0]->getGroup() + "</text>\n";  
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedAB[0] - sharedABC).length() / 2)) + "\"  y=\"170\">" + toString(sharedAB[0] - sharedABC) + "</text>\n";  
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)toString(numB[0]-sharedBwithAC[0]).length() / 2)) + "\"  y=\"170\">" + toString(numB[0]-sharedBwithAC[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)lookup[1]->getGroup().length() / 2)) + "\"  y=\"150\">" + lookup[1]->getGroup() + "</text>\n";  
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(268 - ((int)toString(sharedAC[0] - sharedABC).length() / 2)) + "\"  y=\"305\">" + toString(sharedAC[0] - sharedABC) + "</text>\n";  
-                               outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(343 - ((int)toString(numC[0]-sharedCwithAB[0]).length() / 2)) + "\"   y=\"430\">" + toString(numC[0]-sharedCwithAB[0]) + "</text>\n"; 
-                               outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(343 - ((int)lookup[2]->getGroup().length() / 2)) + "\"   y=\"410\">" + lookup[2]->getGroup() + "</text>\n"; 
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(408 - ((int)toString(sharedBC[0] - sharedABC).length() / 2)) + "\" y=\"305\">" + toString(sharedBC[0] - sharedABC) + "</text>\n";  
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedABC).length() / 2)) + "\"  y=\"280\">" + toString(sharedABC) + "</text>\n"; 
-                       
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"660\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"680\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"700\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"720\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"740\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"760\">The number of sepecies shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"580\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]);
-                               if (numA.size() == 3) { 
-                                       outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "</text>\n";
-                               }else { outsvg << "</text>\n"; }
-               
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"600\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]);
-                               if (numB.size() == 3) { 
-                                       outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "</text>\n";
-                               }else { outsvg << "</text>\n"; }
-                               
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"620\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]);
-                               if (numC.size() == 3) { 
-                                       outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "</text>\n";
-                               }else { outsvg << "</text>\n"; }
+                                       //draw circles
+                                       outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"800\" height=\"800\"/>"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
+                                       outsvg << "<circle fill=\"rgb(255,0,0)\" opacity=\".3\" stroke=\"black\" cx=\"230\" cy=\"200\" r=\"150\"/>"; 
+                                       outsvg << "<circle fill=\"rgb(0,255,0)\" opacity=\".3\" stroke=\"black\" cx=\"455\" cy=\"200\" r=\"150\"/>"; 
+                                       outsvg << "<circle fill=\"rgb(0,0,255)\" opacity=\".3\" stroke=\"black\" cx=\"343\" cy=\"400\" r=\"150\"/>"; 
 
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"640\">The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "</text>\n";
-                               outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"780\">The total shared richness is " + toString(sharedABC) + "</text>\n";
+                                       //place labels within overlaps
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]).length() / 2)) + "\" y=\"170\">" + toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]) + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(200 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"150\">" + lookup[0]->getGroup() + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedab[0] - sharedabc[0]).length() / 2)) + "\"  y=\"170\">" + toString(sharedab[0] - sharedabc[0]) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]).length() / 2)) + "\"  y=\"170\">" + toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(485 - ((int)lookup[1]->getGroup().length() / 2)) + "\"  y=\"150\">" + lookup[1]->getGroup() + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(268 - ((int)toString(sharedac[0] - sharedabc[0]).length() / 2)) + "\"  y=\"305\">" + toString(sharedac[0] - sharedabc[0]) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(343 - ((int)toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]).length() / 2)) + "\"   y=\"430\">" + toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]) + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(343 - ((int)lookup[2]->getGroup().length() / 2)) + "\"   y=\"410\">" + lookup[2]->getGroup() + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(408 - ((int)toString(sharedbc[0] - sharedabc[0]).length() / 2)) + "\" y=\"305\">" + toString(sharedbc[0] - sharedabc[0]) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(343 - ((int)toString(sharedabc[0]).length() / 2)) + "\"  y=\"280\">" + toString(sharedabc[0]) + "</text>\n"; 
                                
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"660\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedab[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"680\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedac[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"700\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedbc[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"580\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]);
+                                       if (numA.size() == 3) { 
+                                               outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "</text>\n";
+                                       }else { outsvg << "</text>\n"; }
+                       
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"600\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]);
+                                       if (numB.size() == 3) { 
+                                               outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "</text>\n";
+                                       }else { outsvg << "</text>\n"; }
+                                       
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"620\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]);
+                                       if (numC.size() == 3) { 
+                                               outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "</text>\n";
+                                       }else { outsvg << "</text>\n"; }
+
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"640\">The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedab[0] - sharedac[0] - sharedbc[0] + sharedabc[0]) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"720\">The total shared richness is " + toString(sharedabc[0]) + "</text>\n";
+
+
+                               }
+               
+                                                               
                                //close file
                                outsvg << "</g>\n</svg>\n";
                                outsvg.close();
-                               delete singleCalc;
+                               
 
                        }
                        
@@ -451,7 +531,33 @@ void Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculator*> vCalcs
                                        //get estimate for all four
                                        data = vCalcs[i]->getValues(lookup);
                                        sharedABCD = data[0];
-               //cout << "num abcd = " << sharedABCD << endl << endl;                  
+               //cout << "num abcd = " << sharedABCD << endl << endl;  
+               
+                               
+                                               
+                                       //image window
+                                       outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 700 800\" >\n";
+                                       outsvg << "<g>\n";
+                                       outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"700\" height=\"800\"/>"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
+
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"490\">The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"510\">The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"530\">The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"550\">The number of species in group " + lookup[3]->getGroup() + " is " + toString(numD) + "</text>\n";
+                                       
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"570\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"590\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"610\">The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedAD) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"630\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"650\">The number of sepecies shared between groups " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBD) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"670\">The number of sepecies shared between groups " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedCD) + "</text>\n";
+                                       
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"690\">The number of sepecies shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedABC) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"710\">The number of sepecies shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedABD) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"730\">The number of sepecies shared between groups " + lookup[0]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedACD) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"750\">The number of sepecies shared between groups " + lookup[1]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBCD) + "</text>\n";
+                                                                       
                                        //make adjustments
                                        sharedABC = sharedABC - sharedABCD;
                        //cout << "num abc = " << sharedABC << endl;            
@@ -462,13 +568,12 @@ void Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculator*> vCalcs
                                        sharedBCD = sharedBCD - sharedABCD;
                                //cout << "num bcd = " << sharedBCD << endl;
                                        
-                                       sharedAB = sharedAB - sharedABC - sharedABCD - sharedABD; // cout << "num ab = " << sharedAB << endl;
-                                       sharedAC = sharedAC - sharedABC - sharedABCD - sharedACD; // cout << "num ac = " << sharedAC << endl;
-                                       sharedAD = sharedAD - sharedABD - sharedABCD - sharedACD; // cout << "num ad = " << sharedAD << endl;
-                                       
-                                       sharedBC = sharedBC - sharedABC - sharedABCD - sharedBCD; // cout << "num bc = " << sharedBC << endl;
+                                       sharedAB = sharedAB - sharedABC - sharedABCD - sharedABD;  //cout << "num ab = " << sharedAB << endl;
+                                       sharedAC = sharedAC - sharedABC - sharedABCD - sharedACD;  //cout << "num ac = " << sharedAC << endl;
+                                       sharedAD = sharedAD - sharedABD - sharedABCD - sharedACD;  //cout << "num ad = " << sharedAD << endl;                           
+                                       sharedBC = sharedBC - sharedABC - sharedABCD - sharedBCD;  //cout << "num bc = " << sharedBC << endl;
                                        sharedBD = sharedBD - sharedABD - sharedABCD - sharedBCD; // cout << "num bd = " << sharedBD << endl; 
-                                       sharedCD = sharedCD - sharedACD - sharedABCD - sharedBCD; // cout << "num cd = " << sharedCD << endl;
+                                       sharedCD = sharedCD - sharedACD - sharedABCD - sharedBCD;  //cout << "num cd = " << sharedCD << endl;
                                        
                                        numA = numA - sharedAB - sharedAC - sharedAD - sharedABCD - sharedABC - sharedACD - sharedABD;
                        //cout << "num a = " << numA << endl;           
@@ -479,13 +584,7 @@ void Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculator*> vCalcs
                                        numD = numD - sharedAD - sharedBD - sharedCD - sharedABCD - sharedBCD - sharedACD - sharedABD;
                        //cout << "num d = " << numD << endl;           
                                        
-                                       //image window
-                                       outsvg << "<svg width=\"100%\" height=\"100%\" viewBox=\"0 0 700 700\" >\n";
-                                       outsvg << "<g>\n";
-
                                        //draw circles
-                                       outsvg << "<rect fill=\"white\" stroke=\"white\" x=\"0\" y=\"0\" width=\"700\" height=\"700\"/>"; 
-                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"265\" y=\"30\">Venn Diagram at distance " + lookup[0]->getLabel() + "</text>\n";
                                        outsvg << "<ellipse fill=\"red\" stroke=\"black\" opacity=\".35\" transform=\"rotate(-45 355 215) \" cx=\"355\" cy=\"215\" rx=\"200\" ry=\"115\"/>\n "; 
                                        outsvg << "<ellipse fill=\"green\" stroke=\"black\" opacity=\".35\" transform=\"rotate(+45 355 215) \" cx=\"355\" cy=\"215\" rx=\"200\" ry=\"115\"/>\n ";
                                        outsvg << "<ellipse fill=\"blue\" stroke=\"black\" opacity=\".35\" transform=\"rotate(-40 440 315) \" cx=\"440\" cy=\"315\" rx=\"200\" ry=\"115\"/>\n ";
@@ -494,27 +593,31 @@ void Venn::getPic(vector<SharedRAbundVector*> lookup, vector<Calculator*> vCalcs
                                        //A = red, B = green, C = blue, D = yellow
                        
                                        //place labels within overlaps
-                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(460 - ((int)toString(numA).length() / 2)) + "\" y=\"110\">" + toString(numA) + "</text>\n"; 
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(460 - ((int)toString(numA).length() / 2)) + "\" y=\"110\">" + toString(numA)  + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(460 - ((int)lookup[0]->getGroup().length() / 2)) + "\" y=\"90\">" + lookup[0]->getGroup() + "</text>\n";  
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedAB).length() / 2)) + "\"  y=\"160\">" + toString(sharedAB) + "</text>\n";  
-                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(250 - ((int)toString(numB).length() / 2)) + "\"  y=\"110\">" + toString(numB) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(250 - ((int)toString(numB).length() / 2)) + "\"  y=\"110\">" + toString(numB)  + "</text>\n";  
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(250 - ((int)lookup[1]->getGroup().length() / 2)) + "\"  y=\"90\">" + lookup[1]->getGroup() + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(490 - ((int)toString(sharedAC).length() / 2)) + "\"  y=\"190\">" + toString(sharedAC) + "</text>\n";  
                                        outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(550 - ((int)toString(numC).length() / 2)) + "\"   y=\"230\">" + toString(numC) + "</text>\n";  
                                        outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(550 - ((int)lookup[2]->getGroup().length() / 2)) + "\"   y=\"210\">" + lookup[2]->getGroup() + "</text>\n";
-                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(215 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"190\">" + toString(sharedBC) + "</text>\n";  
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(215 - ((int)toString(sharedBD).length() / 2)) + "\" y=\"190\">" + toString(sharedBD) + "</text>\n";  
                                        outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(150 - ((int)toString(numD).length() / 2)) + "\"   y=\"230\">" + toString(numD) + "</text>\n";  
                                        outsvg << "<text fill=\"black\" class=\"seri\"  x=\"" + toString(150 - ((int)lookup[3]->getGroup().length() / 2)) + "\"   y=\"210\">" + lookup[3]->getGroup() + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(240 - ((int)toString(sharedAD).length() / 2)) + "\" y=\"325\">" + toString(sharedAD) + "</text>\n"; 
-                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(470 - ((int)toString(sharedBD).length() / 2)) + "\" y=\"325\">" + toString(sharedBD) + "</text>\n";
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(470 - ((int)toString(sharedBC).length() / 2)) + "\" y=\"325\">" + toString(sharedBC) + "</text>\n";
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedCD).length() / 2)) + "\" y=\"430\">" + toString(sharedCD) + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(275 - ((int)toString(sharedABD).length() / 2)) + "\" y=\"240\">" + toString(sharedABD) + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(400 - ((int)toString(sharedBCD).length() / 2)) + "\" y=\"360\">" + toString(sharedBCD) + "</text>\n";
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(305 - ((int)toString(sharedACD).length() / 2)) + "\" y=\"360\">" + toString(sharedACD) + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(440 - ((int)toString(sharedABC).length() / 2)) + "\"  y=\"240\">" + toString(sharedABC) + "</text>\n"; 
                                        outsvg << "<text fill=\"black\" class=\"seri\" x=\"" + toString(350 - ((int)toString(sharedABCD).length() / 2)) + "\"  y=\"320\">" + toString(sharedABCD) + "</text>\n"; 
-                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"250\" y=\"490\">The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)) + "</text>\n";
-                       
+                                       
+                                       
+                                                                               
+                                       outsvg << "<text fill=\"black\" class=\"seri\" x=\"175\" y=\"770\">The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)) + "</text>\n";
+                                       
+
                                        outsvg << "</g>\n</svg>\n";
                                        outsvg.close();
                                        delete singleCalc;