]> git.donarmstrong.com Git - mothur.git/commitdiff
added sorted option to get.rabund command
authorwestcott <westcott>
Fri, 10 Jul 2009 15:53:50 +0000 (15:53 +0000)
committerwestcott <westcott>
Fri, 10 Jul 2009 15:53:50 +0000 (15:53 +0000)
13 files changed:
Mothur.xcodeproj/project.pbxproj
bellerophon.cpp [new file with mode: 0644]
bellerophon.h [new file with mode: 0644]
chimera.h [new file with mode: 0644]
chimeraseqscommand.cpp
chimeraseqscommand.h
getrabundcommand.cpp
getrabundcommand.h
pintail.cpp [new file with mode: 0644]
pintail.h [new file with mode: 0644]
rabundvector.cpp
rabundvector.hpp
systemcommand.cpp

index 7959c798a10a0e1745559369e95bbb5d77347f7f..9427eec004a889ab0481a0da9e7d47431367e046 100644 (file)
@@ -38,6 +38,8 @@
                3746109D0F40657600460C57 /* unifracunweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3746109C0F40657600460C57 /* unifracunweightedcommand.cpp */; };
                3749271D0FD58C840031C06B /* getsabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3749271C0FD58C840031C06B /* getsabundcommand.cpp */; };
                3749273F0FD5956B0031C06B /* getrabundcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3749273E0FD5956B0031C06B /* getrabundcommand.cpp */; };
+               374F2FEB100634B600E97C66 /* bellerophon.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374F2FEA100634B600E97C66 /* bellerophon.cpp */; };
+               374F301310063B6F00E97C66 /* pintail.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374F301210063B6F00E97C66 /* pintail.cpp */; };
                37519A6B0F80E6EB00FED5E8 /* sharedanderbergs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37519A6A0F80E6EB00FED5E8 /* sharedanderbergs.cpp */; };
                37519AA10F810D0200FED5E8 /* venncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37519AA00F810D0200FED5E8 /* venncommand.cpp */; };
                37519AB50F810FAE00FED5E8 /* venn.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37519AB40F810FAE00FED5E8 /* venn.cpp */; };
                3749271C0FD58C840031C06B /* getsabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getsabundcommand.cpp; sourceTree = SOURCE_ROOT; };
                3749273D0FD5956B0031C06B /* getrabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getrabundcommand.h; sourceTree = SOURCE_ROOT; };
                3749273E0FD5956B0031C06B /* getrabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getrabundcommand.cpp; sourceTree = SOURCE_ROOT; };
+               374F2FD51006320200E97C66 /* chimera.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimera.h; sourceTree = "<group>"; };
+               374F2FE9100634B600E97C66 /* bellerophon.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bellerophon.h; sourceTree = "<group>"; };
+               374F2FEA100634B600E97C66 /* bellerophon.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bellerophon.cpp; sourceTree = "<group>"; };
+               374F301110063B6F00E97C66 /* pintail.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pintail.h; sourceTree = "<group>"; };
+               374F301210063B6F00E97C66 /* pintail.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pintail.cpp; sourceTree = "<group>"; };
                37519A690F80E6EB00FED5E8 /* sharedanderbergs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sharedanderbergs.h; sourceTree = SOURCE_ROOT; };
                37519A6A0F80E6EB00FED5E8 /* sharedanderbergs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedanderbergs.cpp; sourceTree = SOURCE_ROOT; };
                37519A9F0F810D0200FED5E8 /* venncommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = venncommand.h; sourceTree = SOURCE_ROOT; };
                                373C68DC0FC1C38D00137ACD /* blastalign.hpp */,
                                373C68DB0FC1C38D00137ACD /* blastalign.cpp */,
                                37D928A60F2133C0001D4494 /* calculators */,
+                               374F2FE81006346600E97C66 /* chimera */,
                                37D927C20F21331F001D4494 /* cluster.hpp */,
                                37D927C10F21331F001D4494 /* cluster.cpp */,
                                37D928A90F2133E5001D4494 /* commands */,
                        name = Products;
                        sourceTree = "<group>";
                };
+               374F2FE81006346600E97C66 /* chimera */ = {
+                       isa = PBXGroup;
+                       children = (
+                               374F2FD51006320200E97C66 /* chimera.h */,
+                               374F2FE9100634B600E97C66 /* bellerophon.h */,
+                               374F2FEA100634B600E97C66 /* bellerophon.cpp */,
+                               374F301110063B6F00E97C66 /* pintail.h */,
+                               374F301210063B6F00E97C66 /* pintail.cpp */,
+                       );
+                       name = chimera;
+                       sourceTree = "<group>";
+               };
                3796441D0FB9B9650081FDB6 /* read */ = {
                        isa = PBXGroup;
                        children = (
                                37B73CA61004D89A008C4B41 /* getseqscommand.cpp in Sources */,
                                37B73CC01004EB38008C4B41 /* removeseqscommand.cpp in Sources */,
                                37B73CCF1004F5E0008C4B41 /* systemcommand.cpp in Sources */,
+                               374F2FEB100634B600E97C66 /* bellerophon.cpp in Sources */,
+                               374F301310063B6F00E97C66 /* pintail.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
diff --git a/bellerophon.cpp b/bellerophon.cpp
new file mode 100644 (file)
index 0000000..f2dd821
--- /dev/null
@@ -0,0 +1,409 @@
+/*
+ *  bellerophon.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/9/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "bellerophon.h"
+#include "eachgapdist.h"
+#include "ignoregaps.h"
+#include "onegapdist.h"
+
+
+//***************************************************************************************************************
+
+Bellerophon::Bellerophon(string name) {
+       try {
+               fastafile = name;
+       }
+       catch(exception& e) {
+               errorOut(e, "Bellerophon", "Bellerophon");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+void Bellerophon::print(ostream& out) {
+       try {
+               int above1 = 0;
+               out << "Name\tScore\tLeft\tRight\t" << endl;
+               //output prefenence structure to .chimeras file
+               for (int i = 0; i < pref.size(); i++) {
+                       out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
+                       
+                       //calc # of seqs with preference above 1.0
+                       if (pref[i].score[0] > 1.0) { 
+                               above1++; 
+                               mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
+                               mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
+                       }
+               }
+               
+               //output results to screen
+               mothurOutEndLine();
+               mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine();
+               int spot;
+               spot = pref.size()-1;
+               mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.975;
+               mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.75;
+               mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.50;
+               mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.25;
+               mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = pref.size() * 0.025;
+               mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               spot = 0;
+               mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Bellerophon", "print");
+               exit(1);
+       }
+}
+
+//********************************************************************************************************************
+//sorts highest score to lowest
+inline bool comparePref(Preference left, Preference right){
+       return (left.score[0] > right.score[0]);        
+}
+
+//***************************************************************************************************************
+void Bellerophon::getChimeras() {
+       try {
+               
+               //do soft filter
+               if (filter)  {
+                       string optionString = "fasta=" + fastafile + ", soft=50, vertical=F";
+                       filterSeqs = new FilterSeqsCommand(optionString);
+                       filterSeqs->execute();
+                       delete filterSeqs;
+                       
+                       //reset fastafile to filtered file
+                       fastafile = getRootName(fastafile) + "filter.fasta";
+               }
+               
+               distCalculator = new eachGapDist();
+               
+               //read in sequences
+               readSeqs();
+               
+               int numSeqs = seqs.size();
+               
+               if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); exit(1); }
+               
+               //set default window to 25% of sequence length
+               string seq0 = seqs[0].getAligned();
+               if (window == 0) { window = seq0.length() / 4;  }
+               else if (window > (seq0.length() / 2)) {  
+                       mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
+                       window = (seq0.length() / 2);
+               }
+               
+               if (increment > (seqs[0].getAlignLength() - (2*window))) { 
+                       if (increment != 10) {
+                       
+                               mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
+                               increment = 10;
+                               if (increment > (seqs[0].getAlignLength() - (2*window))) {  increment = 0;  }
+                               
+                       }else{ increment = 0; }
+               }
+cout << "increment = " << increment << endl;           
+               if (increment == 0) { iters = 1; }
+               else { iters = ((seqs[0].getAlignLength() - (2*window)) / increment); }
+               
+               //initialize pref
+               pref.resize(numSeqs);  
+               
+               for (int i = 0; i < numSeqs; i++ ) { 
+                       pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2);   pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
+                       pref[i].name = seqs[i].getName();
+                       pref[i].score[0] = 0.0;  pref[i].score[1] = 0.0; 
+                       pref[i].closestLeft[0] = 100000.0;  pref[i].closestLeft[1] = 100000.0;  
+                       pref[i].closestRight[0] = 100000.0;  pref[i].closestRight[1] = 100000.0;  
+               }
+
+               int midpoint = window;
+               int count = 0;
+               while (count < iters) {
+                               
+                               //create 2 vectors of sequences, 1 for left side and one for right side
+                               vector<Sequence> left;  vector<Sequence> right;
+                               
+                               for (int i = 0; i < seqs.size(); i++) {
+//cout << "whole = " << seqs[i].getAligned() << endl;
+                                       //save left side
+                                       string seqLeft = seqs[i].getAligned().substr(midpoint-window, window);
+                                       Sequence tempLeft;
+                                       tempLeft.setName(seqs[i].getName());
+                                       tempLeft.setAligned(seqLeft);
+                                       left.push_back(tempLeft);
+//cout << "left = " << tempLeft.getAligned() << endl;                  
+                                       //save right side
+                                       string seqRight = seqs[i].getAligned().substr(midpoint, window);
+                                       Sequence tempRight;
+                                       tempRight.setName(seqs[i].getName());
+                                       tempRight.setAligned(seqRight);
+                                       right.push_back(tempRight);
+//cout << "right = " << seqRight << endl;      
+                               }
+                               
+                               //adjust midpoint by increment
+                               midpoint += increment;
+                               
+                               
+                               //this should be parallelized
+                               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
+                               //create a matrix containing the distance from left to left and right to right
+                               //calculate distances
+                               SparseMatrix* SparseLeft = new SparseMatrix();
+                               SparseMatrix* SparseRight = new SparseMatrix();
+                               
+                               createSparseMatrix(0, left.size(), SparseLeft, left);
+                               createSparseMatrix(0, right.size(), SparseRight, right);
+                               
+                               vector<SeqMap> distMapRight;
+                               vector<SeqMap> distMapLeft;
+                               
+                               // Create a data structure to quickly access the distance information.
+                               // It consists of a vector of distance maps, where each map contains
+                               // all distances of a certain sequence. Vector and maps are accessed
+                               // via the index of a sequence in the distance matrix
+                               distMapRight = vector<SeqMap>(numSeqs); 
+                               distMapLeft = vector<SeqMap>(numSeqs); 
+                               //cout << "left" << endl << endl;
+                               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
+                                       distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
+                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               }
+                               //cout << "right" << endl << endl;
+                               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
+                                       distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
+                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+                               }
+                               
+                               delete SparseLeft;
+                               delete SparseRight;
+                               
+                               
+                               //fill preference structure
+                               generatePreferences(distMapLeft, distMapRight, midpoint);
+                               
+                               count++;
+                               
+               }
+               
+               delete distCalculator;
+               
+               //rank preference score to eachother
+               float dme = 0.0;
+               float expectedPercent = 1 / (float) (pref.size());
+               
+               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[0];  }
+       
+               for (int i = 0; i < pref.size(); i++) {
+
+                       //gives the actual percentage of the dme this seq adds
+                       pref[i].score[0] = pref[i].score[0] / dme;
+                       
+                       //how much higher or lower is this than expected
+                       pref[i].score[0] = pref[i].score[0] / expectedPercent;
+                       
+               }
+               
+               
+               //sort Preferences highest to lowest
+               sort(pref.begin(), pref.end(), comparePref);
+
+
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Bellerophon", "getChimeras");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+void Bellerophon::readSeqs(){
+       try {
+               ifstream inFASTA;
+               openInputFile(fastafile, inFASTA);
+               
+               //read in seqs and store in vector
+               while(!inFASTA.eof()){
+                       Sequence current(inFASTA);
+                       
+                       if (current.getAligned() == "") { current.setAligned(current.getUnaligned()); }
+                       
+                       seqs.push_back(current);
+                       
+                       gobble(inFASTA);
+               }
+               inFASTA.close();
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Bellerophon", "readSeqs");
+               exit(1);
+       }
+}
+
+/***************************************************************************************************************/
+int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
+       try {
+
+               for(int i=startSeq; i<endSeq; i++){
+                       
+                       for(int j=0;j<i;j++){
+                       
+                               distCalculator->calcDist(s[i], s[j]);
+                               float dist = distCalculator->getDist();
+                       
+                               PCell temp(i, j, dist);
+                               sparse->addCell(temp);
+                               
+                       }
+               }
+                       
+       
+               return 1;
+       }
+       catch(exception& e) {
+               errorOut(e, "Bellerophon", "createSparseMatrix");
+               exit(1);
+       }
+}
+/***************************************************************************************************************/
+void Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
+       try {
+               
+               float dme = 0.0;
+               SeqMap::iterator itR;
+               SeqMap::iterator itL;
+               
+               //initialize pref[i]
+               for (int i = 0; i < pref.size(); i++) {
+                       pref[i].score[1] = 0.0;
+                       pref[i].closestLeft[1] = 100000.0; 
+                       pref[i].closestRight[1] = 100000.0; 
+                       pref[i].leftParent[1] = "";
+                       pref[i].rightParent[1] = "";
+               }
+       
+               for (int i = 0; i < left.size(); i++) {
+                       
+                       SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
+                                                                                               //                                                                              1 to the distance of sequence 3 to sequence 1,
+                                                                                               //                                                                              2 to the distance of sequence 3 to sequence 2.
+                       SeqMap currentRight = right[i];         // same as left but with distances on the right side.
+                       
+                       for (int j = 0; j < i; j++) {
+                               
+                               itL = currentLeft.find(j);
+                               itR = currentRight.find(j);
+//cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
+//cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
+                               
+                               //if you can find this entry update the preferences
+                               if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
+                               
+                                       if (!correction) {
+                                               pref[i].score[1] += abs((itL->second - itR->second));
+                                               pref[j].score[1] += abs((itL->second - itR->second));
+//cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
+//cout << "abs = " << abs((itL->second - itR->second)) << endl;
+//cout << i << " score = " << pref[i].score[1] << endl;
+//cout << j << " score = " << pref[j].score[1] << endl;
+                                       }else {
+                                               pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+                                               pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+//cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
+//cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
+//cout << i << " score = " << pref[i].score[1] << endl;
+//cout << j << " score = " << pref[j].score[1] << endl;
+                                       }
+//cout << "pref[" << i << "].closestLeft[1] = "        <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
+                                       //are you the closest left sequence
+                                       if (itL->second < pref[i].closestLeft[1]) {  
+
+                                               pref[i].closestLeft[1] = itL->second;
+                                               pref[i].leftParent[1] = seqs[j].getName();
+//cout << "updating closest left to " << pref[i].leftParent[1] << endl;
+                                       }
+//cout << "pref[" << j << "].closestLeft[1] = "        <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
+                                       if (itL->second < pref[j].closestLeft[1]) { 
+                                               pref[j].closestLeft[1] = itL->second;
+                                               pref[j].leftParent[1] = seqs[i].getName();
+//cout << "updating closest left to " << pref[j].leftParent[1] << endl;
+                                       }
+                                       
+                                       //are you the closest right sequence
+                                       if (itR->second < pref[i].closestRight[1]) {   
+                                               pref[i].closestRight[1] = itR->second;
+                                               pref[i].rightParent[1] = seqs[j].getName();
+                                       }
+                                       if (itR->second < pref[j].closestRight[1]) {   
+                                               pref[j].closestRight[1] = itR->second;
+                                               pref[j].rightParent[1] = seqs[i].getName();
+                                       }
+                                       
+                               }
+                       }
+               
+               }
+               
+               
+                 
+               //calculate the dme
+               
+               int count0 = 0;
+               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[1];  if (pref[i].score[1] == 0.0) { count0++; }  }
+               
+               float expectedPercent = 1 / (float) (pref.size() - count0);
+//cout << endl << "dme = " << dme << endl;
+               //recalculate prefernences based on dme
+               for (int i = 0; i < pref.size(); i++) {
+//cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;        
+                       // gives the actual percentage of the dme this seq adds
+                       pref[i].score[1] = pref[i].score[1] / dme;
+                       
+                       //how much higher or lower is this than expected
+                       pref[i].score[1] = pref[i].score[1] / expectedPercent;
+                       
+                       //pref[i].score[1] = dme / (dme - 2 * pref[i].score[1]);
+                       
+                       //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
+//cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;                                  
+               }
+               
+               //is this score bigger then the last score
+               for (int i = 0; i < pref.size(); i++) {  
+                       
+                       //update biggest score
+                       if (pref[i].score[1] > pref[i].score[0]) {
+                               pref[i].score[0] = pref[i].score[1];
+                               pref[i].leftParent[0] = pref[i].leftParent[1];
+                               pref[i].rightParent[0] = pref[i].rightParent[1];
+                               pref[i].closestLeft[0] = pref[i].closestLeft[1];
+                               pref[i].closestRight[0] = pref[i].closestRight[1];
+                               pref[i].midpoint = mid;
+                       }
+                       
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Bellerophon", "generatePreferences");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
diff --git a/bellerophon.h b/bellerophon.h
new file mode 100644 (file)
index 0000000..3f28e5b
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef BELLEROPHON_H
+#define BELLEROPHON_H
+
+/*
+ *  bellerophon.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/9/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+
+#include "chimera.h"
+#include "filterseqscommand.h"
+#include "sequence.hpp"
+#include "dist.h"
+
+struct Preference {
+               string name;
+               vector<string> leftParent; //keep the name of closest left associated with the two scores
+               vector<string> rightParent; //keep the name of closest right associated with the two scores
+               vector<float> score;  //so you can keep last score and calc this score and keep whichever is bigger.
+               vector<float> closestLeft;  //keep the closest left associated with the two scores
+               vector<float> closestRight; //keep the closest right associated with the two scores
+               int midpoint;
+
+};
+
+
+/***********************************************************/
+
+class Bellerophon : public Chimera {
+       
+       public:
+               Bellerophon(string);    
+               ~Bellerophon() {};
+               
+               void getChimeras();
+               void print(ostream&);
+               
+               
+       private:
+               Dist* distCalculator;
+               FilterSeqsCommand* filterSeqs;
+               vector<Sequence> seqs;
+               vector<Preference> pref;
+               string fastafile;
+               int iters;
+               
+               void readSeqs();
+               void generatePreferences(vector<SeqMap>, vector<SeqMap>, int);
+               int createSparseMatrix(int, int, SparseMatrix*, vector<Sequence>);
+       
+
+               
+
+};
+
+/***********************************************************/
+
+#endif
+
diff --git a/chimera.h b/chimera.h
new file mode 100644 (file)
index 0000000..07ad92f
--- /dev/null
+++ b/chimera.h
@@ -0,0 +1,53 @@
+#ifndef CHIMERA_H
+#define CHIMERA_H
+
+/*
+ *  chimera.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/9/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "sparsematrix.hpp"
+
+typedef list<PCell>::iterator MatData;
+typedef map<int, float> SeqMap;  //maps sequence to all distance for that seqeunce
+
+
+/***********************************************************************/
+
+class Chimera {
+
+       public:
+       
+               Chimera(){};
+               Chimera(string);
+               virtual ~Chimera(){};
+               virtual void setFilter(bool f)                  {       filter = f;                     }
+               virtual void setCorrection(bool c)              {       correction = c;         }
+               virtual void setProcessors(int p)               {       processors = p;         }
+               virtual void setWindow(int w)                   {       window = w;                     }
+               virtual void setIncrement(int i)                {       increment = i;          }
+               virtual void setTemplate(string t)              {       templateFile = t;       }
+               
+               //pure functions
+               virtual void getChimeras() = 0; 
+               virtual void print(ostream&) = 0;       
+               
+       protected:
+               
+               bool filter, correction;
+               int processors, window, increment;
+               string templateFile;
+       
+
+};
+
+/***********************************************************************/
+
+#endif
+
index a334ea31ccc2a7fd0334f687d3f84c16c00d3494..105fd11b90afd8ce700bb3b20b66bcc9b39bdbb2 100644 (file)
@@ -8,9 +8,8 @@
  */
 
 #include "chimeraseqscommand.h"
-#include "eachgapdist.h"
-#include "ignoregaps.h"
-#include "onegapdist.h"
+#include "bellerophon.h"
+#include "pintail.h"
 
 //***************************************************************************************************************
 
@@ -23,7 +22,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -41,6 +40,11 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        if (fastafile == "not open") { abort = true; }
                        else if (fastafile == "not found") { fastafile = ""; mothurOut("fasta is a required parameter for the chimera.seqs command."); mothurOutEndLine(); abort = true;  }     
                        
+                       templatefile = validParameter.validFile(parameters, "template", true);
+                       if (templatefile == "not open") { abort = true; }
+                       else if (templatefile == "not found") { templatefile = "";  }   
+                       
+
                        string temp;
                        temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "T"; }
                        filter = isTrue(temp);
@@ -57,9 +61,10 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        temp = validParameter.validFile(parameters, "increment", false);                        if (temp == "not found") { temp = "10"; }
                        convert(temp, increment);
                                
-                       method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "bellerophon"; }
+                       method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "pintail"; }
+                       
+                       if ((method == "pintail") && (templatefile == "")) { mothurOut("You must provide a template file with the pintail method."); mothurOutEndLine(); abort = true;  }
                        
-                       if (method != "bellerophon") { mothurOut(method + " is not a valid method."); mothurOutEndLine();  abort = true; }
 
                }
        }
@@ -75,9 +80,9 @@ void ChimeraSeqsCommand::help(){
                mothurOut("The chimera.seqs command reads a fastafile and creates a sorted priority score list of potentially chimeric sequences (ideally, the sequences should already be aligned).\n");
                mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors and method.  fasta is required.\n");
                mothurOut("The filter parameter allows you to specify if you would like to apply a 50% soft filter.  The default is false. \n");
-               mothurOut("The correction parameter allows you to .....  The default is true. \n");
+               mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.   The default is true. \n");
                mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
-               mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is bellerophon. \n");
+               mothurOut("The method parameter allows you to specify the method for finding chimeric sequences.  The default is pintail. \n");
                mothurOut("The chimera.seqs command should be in the following format: \n");
                mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
                mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, processors=2, method=yourMethod) \n");
@@ -88,11 +93,6 @@ void ChimeraSeqsCommand::help(){
                exit(1);
        }
 }
-//********************************************************************************************************************
-//sorts highest score to lowest
-inline bool comparePref(Preference left, Preference right){
-       return (left.score[0] > right.score[0]);        
-}
 
 //***************************************************************************************************************
 
@@ -105,373 +105,40 @@ int ChimeraSeqsCommand::execute(){
                
                if (abort == true) { return 0; }
                
+               if (method == "bellerophon")    {       chimera = new Bellerophon(fastafile);   }
+               else if (method == "pintail")   {       chimera = new Pintail(fastafile);               }
+               else { mothurOut("Not a valid method."); mothurOutEndLine(); return 0;          }
                
-               //do soft filter
-               if (filter)  {
-                       string optionString = "fasta=" + fastafile + ", soft=50, vertical=F";
-                       filterSeqs = new FilterSeqsCommand(optionString);
-                       filterSeqs->execute();
-                       delete filterSeqs;
-                       
-                       //reset fastafile to filtered file
-                       fastafile = getRootName(fastafile) + "filter.fasta";
-               }
-               
-               distCalculator = new eachGapDist();
-               
-               //read in sequences
-               readSeqs();
+               //set user options
+               chimera->setFilter(filter);
+               chimera->setCorrection(correction);
+               chimera->setProcessors(processors);
+               chimera->setWindow(window);
+               chimera->setIncrement(increment);
+               chimera->setTemplate(templatefile); 
                
-               int numSeqs = seqs.size();
-               
-               if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); return 0; }
-               
-               //set default window to 25% of sequence length
-               string seq0 = seqs[0].getAligned();
-               if (window == 0) { window = seq0.length() / 4;  }
-               else if (window > (seq0.length() / 2)) {  
-                       mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
-                       window = (seq0.length() / 2);
-               }
-               
-               if (increment > (seqs[0].getAlignLength() - (2*window))) { 
-                       if (increment != 10) {
-                       
-                               mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
-                               increment = 10;
-                               if (increment > (seqs[0].getAlignLength() - (2*window))) {  increment = 0;  }
-                               
-                       }else{ increment = 0; }
-               }
-cout << "increment = " << increment << endl;           
-               if (increment == 0) { iters = 1; }
-               else { iters = ((seqs[0].getAlignLength() - (2*window)) / increment); }
+               //find chimeras
+               chimera->getChimeras();
                
-               //initialize pref
-               pref.resize(numSeqs);  
-               
-               for (int i = 0; i < numSeqs; i++ ) { 
-                       pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2);   pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
-                       pref[i].name = seqs[i].getName();
-                       pref[i].score[0] = 0.0;  pref[i].score[1] = 0.0; 
-                       pref[i].closestLeft[0] = 100000.0;  pref[i].closestLeft[1] = 100000.0;  
-                       pref[i].closestRight[0] = 100000.0;  pref[i].closestRight[1] = 100000.0;  
-               }
-
-               int midpoint = window;
-               int count = 0;
-               while (count < iters) {
-                               
-                               //create 2 vectors of sequences, 1 for left side and one for right side
-                               vector<Sequence> left;  vector<Sequence> right;
-                               
-                               for (int i = 0; i < seqs.size(); i++) {
-//cout << "whole = " << seqs[i].getAligned() << endl;
-                                       //save left side
-                                       string seqLeft = seqs[i].getAligned().substr(midpoint-window, window);
-                                       Sequence tempLeft;
-                                       tempLeft.setName(seqs[i].getName());
-                                       tempLeft.setAligned(seqLeft);
-                                       left.push_back(tempLeft);
-//cout << "left = " << tempLeft.getAligned() << endl;                  
-                                       //save right side
-                                       string seqRight = seqs[i].getAligned().substr(midpoint, window);
-                                       Sequence tempRight;
-                                       tempRight.setName(seqs[i].getName());
-                                       tempRight.setAligned(seqRight);
-                                       right.push_back(tempRight);
-//cout << "right = " << seqRight << endl;      
-                               }
-                               
-                               //adjust midpoint by increment
-                               midpoint += increment;
-                               
-                               
-                               //this should be parallelized
-                               //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
-                               //create a matrix containing the distance from left to left and right to right
-                               //calculate distances
-                               SparseMatrix* SparseLeft = new SparseMatrix();
-                               SparseMatrix* SparseRight = new SparseMatrix();
-                               
-                               createSparseMatrix(0, left.size(), SparseLeft, left);
-                               createSparseMatrix(0, right.size(), SparseRight, right);
-                               
-                               vector<SeqMap> distMapRight;
-                               vector<SeqMap> distMapLeft;
-                               
-                               // Create a data structure to quickly access the distance information.
-                               // It consists of a vector of distance maps, where each map contains
-                               // all distances of a certain sequence. Vector and maps are accessed
-                               // via the index of a sequence in the distance matrix
-                               distMapRight = vector<SeqMap>(numSeqs); 
-                               distMapLeft = vector<SeqMap>(numSeqs); 
-                               //cout << "left" << endl << endl;
-                               for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
-                                       distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
-                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
-                               }
-                               //cout << "right" << endl << endl;
-                               for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
-                                       distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
-                                       //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
-                               }
-                               
-                               delete SparseLeft;
-                               delete SparseRight;
-                               
-                               
-                               //fill preference structure
-                               generatePreferences(distMapLeft, distMapRight, midpoint);
-                               
-                               count++;
-                               
-               }
-               
-               delete distCalculator;
-               
-               //rank preference score to eachother
-               float dme = 0.0;
-               float expectedPercent = 1 / (float) (pref.size());
-               
-               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[0];  }
-       
-               for (int i = 0; i < pref.size(); i++) {
-
-                       //gives the actual percentage of the dme this seq adds
-                       pref[i].score[0] = pref[i].score[0] / dme;
-                       
-                       //how much higher or lower is this than expected
-                       pref[i].score[0] = pref[i].score[0] / expectedPercent;
-                       
-               }
-               
-               
-               //sort Preferences highest to lowest
-               sort(pref.begin(), pref.end(), comparePref);
-               
-               string outputFileName = getRootName(fastafile) + "chimeras";
+               string outputFileName = getRootName(fastafile) + method + ".chimeras";
                ofstream out;
                openOutputFile(outputFileName, out);
                
-               int above1 = 0;
-               out << "Name\tScore\tLeft\tRight\t" << endl;
-               //output prefenence structure to .chimeras file
-               for (int i = 0; i < pref.size(); i++) {
-                       out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
-                       
-                       //calc # of seqs with preference above 1.0
-                       if (pref[i].score[0] > 1.0) { 
-                               above1++; 
-                               mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
-                               mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
-                       }
-                       
-                       
-               }
+               //print results
+               chimera->print(out);
                
-               //output results to screen
-               mothurOutEndLine();
-               mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine();
-               int spot;
-               spot = pref.size()-1;
-               mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
-               spot = pref.size() * 0.975;
-               mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
-               spot = pref.size() * 0.75;
-               mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
-               spot = pref.size() * 0.50;
-               mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine();
-               spot = pref.size() * 0.25;
-               mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
-               spot = pref.size() * 0.025;
-               mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
-               spot = 0;
-               mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+               out.close();
                
-               return 0;
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "execute");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-void ChimeraSeqsCommand::readSeqs(){
-       try {
-               ifstream inFASTA;
-               openInputFile(fastafile, inFASTA);
+               delete chimera;
                
-               //read in seqs and store in vector
-               while(!inFASTA.eof()){
-                       Sequence current(inFASTA);
-                       
-                       if (current.getAligned() == "") { current.setAligned(current.getUnaligned()); }
-                       
-                       seqs.push_back(current);
-                       
-                       gobble(inFASTA);
-               }
-               inFASTA.close();
-
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "readSeqs");
-               exit(1);
-       }
-}
-
-/***************************************************************************************************************/
-int ChimeraSeqsCommand::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
-       try {
-
-               for(int i=startSeq; i<endSeq; i++){
-                       
-                       for(int j=0;j<i;j++){
-                       
-                               distCalculator->calcDist(s[i], s[j]);
-                               float dist = distCalculator->getDist();
-                       
-                               PCell temp(i, j, dist);
-                               sparse->addCell(temp);
-                               
-                       }
-               }
-                       
-       
-               return 1;
-       }
-       catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "createSparseMatrix");
-               exit(1);
-       }
-}
-/***************************************************************************************************************/
-void ChimeraSeqsCommand::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
-       try {
-               
-               float dme = 0.0;
-               SeqMap::iterator itR;
-               SeqMap::iterator itL;
-               
-               //initialize pref[i]
-               for (int i = 0; i < pref.size(); i++) {
-                       pref[i].score[1] = 0.0;
-                       pref[i].closestLeft[1] = 100000.0; 
-                       pref[i].closestRight[1] = 100000.0; 
-                       pref[i].leftParent[1] = "";
-                       pref[i].rightParent[1] = "";
-               }
-       
-               for (int i = 0; i < left.size(); i++) {
-                       
-                       SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
-                                                                                               //                                                                              1 to the distance of sequence 3 to sequence 1,
-                                                                                               //                                                                              2 to the distance of sequence 3 to sequence 2.
-                       SeqMap currentRight = right[i];         // same as left but with distances on the right side.
-                       
-                       for (int j = 0; j < i; j++) {
-                               
-                               itL = currentLeft.find(j);
-                               itR = currentRight.find(j);
-//cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
-//cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
-                               
-                               //if you can find this entry update the preferences
-                               if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
-                               
-                                       if (!correction) {
-                                               pref[i].score[1] += abs((itL->second - itR->second));
-                                               pref[j].score[1] += abs((itL->second - itR->second));
-//cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
-//cout << "abs = " << abs((itL->second - itR->second)) << endl;
-//cout << i << " score = " << pref[i].score[1] << endl;
-//cout << j << " score = " << pref[j].score[1] << endl;
-                                       }else {
-                                               pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
-                                               pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
-//cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
-//cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
-//cout << i << " score = " << pref[i].score[1] << endl;
-//cout << j << " score = " << pref[j].score[1] << endl;
-                                       }
-//cout << "pref[" << i << "].closestLeft[1] = "        <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
-                                       //are you the closest left sequence
-                                       if (itL->second < pref[i].closestLeft[1]) {  
-
-                                               pref[i].closestLeft[1] = itL->second;
-                                               pref[i].leftParent[1] = seqs[j].getName();
-//cout << "updating closest left to " << pref[i].leftParent[1] << endl;
-                                       }
-//cout << "pref[" << j << "].closestLeft[1] = "        <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
-                                       if (itL->second < pref[j].closestLeft[1]) { 
-                                               pref[j].closestLeft[1] = itL->second;
-                                               pref[j].leftParent[1] = seqs[i].getName();
-//cout << "updating closest left to " << pref[j].leftParent[1] << endl;
-                                       }
-                                       
-                                       //are you the closest right sequence
-                                       if (itR->second < pref[i].closestRight[1]) {   
-                                               pref[i].closestRight[1] = itR->second;
-                                               pref[i].rightParent[1] = seqs[j].getName();
-                                       }
-                                       if (itR->second < pref[j].closestRight[1]) {   
-                                               pref[j].closestRight[1] = itR->second;
-                                               pref[j].rightParent[1] = seqs[i].getName();
-                                       }
-                                       
-                               }
-                       }
-               
-               }
-               
-               
-                 
-               //calculate the dme
-               
-               int count0 = 0;
-               for (int i = 0; i < pref.size(); i++) {  dme += pref[i].score[1];  if (pref[i].score[1] == 0.0) { count0++; }  }
-               
-               float expectedPercent = 1 / (float) (pref.size() - count0);
-//cout << endl << "dme = " << dme << endl;
-               //recalculate prefernences based on dme
-               for (int i = 0; i < pref.size(); i++) {
-//cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;        
-                       // gives the actual percentage of the dme this seq adds
-                       pref[i].score[1] = pref[i].score[1] / dme;
-                       
-                       //how much higher or lower is this than expected
-                       pref[i].score[1] = pref[i].score[1] / expectedPercent;
-                       
-                       //pref[i].score[1] = dme / (dme - 2 * pref[i].score[1]);
-                       
-                       //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
-//cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;                                  
-               }
+               return 0;
                
-               //is this score bigger then the last score
-               for (int i = 0; i < pref.size(); i++) {  
-                       
-                       //update biggest score
-                       if (pref[i].score[1] > pref[i].score[0]) {
-                               pref[i].score[0] = pref[i].score[1];
-                               pref[i].leftParent[0] = pref[i].leftParent[1];
-                               pref[i].rightParent[0] = pref[i].rightParent[1];
-                               pref[i].closestLeft[0] = pref[i].closestLeft[1];
-                               pref[i].closestRight[0] = pref[i].closestRight[1];
-                               pref[i].midpoint = mid;
-                       }
-                       
-               }
-
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraSeqsCommand", "generatePreferences");
+               errorOut(e, "ChimeraSeqsCommand", "execute");
                exit(1);
        }
 }
-/**************************************************************************************************/
 
 /**************************************************************************************************/
 
index 2398ce63b257935d857e06bf6525b6b94e8efd05..dc7df8fad969d33a2fa51f46718f5aa4812321f1 100644 (file)
 
 #include "mothur.h"
 #include "command.hpp"
-#include "filterseqscommand.h"
-#include "sequence.hpp"
-#include "sparsematrix.hpp"
-#include "dist.h"
-
-typedef list<PCell>::iterator MatData;
-typedef map<int, float> SeqMap;  //maps sequence to all distance for that seqeunce
-
-struct Preference {
-               string name;
-               vector<string> leftParent; //keep the name of closest left associated with the two scores
-               vector<string> rightParent; //keep the name of closest right associated with the two scores
-               vector<float> score;  //so you can keep last score and calc this score and keep whichever is bigger.
-               vector<float> closestLeft;  //keep the closest left associated with the two scores
-               vector<float> closestRight; //keep the closest right associated with the two scores
-               int midpoint;
-
-};
-
+#include "chimera.h"
 
 
 /***********************************************************/
@@ -45,21 +27,13 @@ public:
                
 private:
        
-       Dist* distCalculator;
        bool abort;
-       string method, fastafile;
+       string method, fastafile, templatefile;
        bool filter, correction;
        int processors, midpoint, averageLeft, averageRight, window, iters, increment;
-       FilterSeqsCommand* filterSeqs;
-       ListVector* list;
-       vector<Sequence> seqs;
-       vector<Preference> pref;
+       Chimera* chimera;
        
-       void readSeqs();
-       void generatePreferences(vector<SeqMap>, vector<SeqMap>, int);
-       int createSparseMatrix(int, int, SparseMatrix*, vector<Sequence>);
        
-
 };
 
 /***********************************************************/
index 73862a85eda66077fdc51b9bbebc1dc579c8da02..b0bf81980a54d7bb6f8992b49f0af83d3169f4f8 100644 (file)
@@ -24,7 +24,7 @@ GetRAbundCommand::GetRAbundCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"line","label"};
+                       string Array[] =  {"line","label","sorted"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -42,6 +42,11 @@ GetRAbundCommand::GetRAbundCommand(string option){
                        
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
+                       
+                       string temp;
+                       temp = validParameter.validFile(parameters, "sorted", false);                   if (temp == "not found") { temp = "T"; }
+                       sorted = isTrue(temp);
+                       
                        line = validParameter.validFile(parameters, "line", false);                             
                        if (line == "not found") { line = "";  }
                        else { 
@@ -82,10 +87,11 @@ GetRAbundCommand::GetRAbundCommand(string option){
 void GetRAbundCommand::help(){
        try {
                mothurOut("The get.rabund command can only be executed after a successful read.otu of a listfile.\n");
-               mothurOut("The get.rabund command parameters are line and label.  No parameters are required, and you may not use line and label at the same time.\n");
+               mothurOut("The get.rabund command parameters are line, label and sorted.  No parameters are required, and you may not use line and label at the same time.\n");
                mothurOut("The line and label allow you to select what distance levels you would like included in your .rabund file, and are separated by dashes.\n");
-               mothurOut("The get.rabund command should be in the following format: get.rabund(line=yourLines, label=yourLabels).\n");
-               mothurOut("Example get.rabund(line=1-3-5).\n");
+               mothurOut("The sorted parameters allows you to print the rabund results sorted by abundance or not.  The default is sorted.\n");
+               mothurOut("The get.rabund command should be in the following format: get.rabund(line=yourLines, label=yourLabels, sorted=yourSorted).\n");
+               mothurOut("Example get.rabund(line=1-3-5, sorted=F).\n");
                mothurOut("The default value for line and label are all lines in your inputfile.\n");
                mothurOut("The get.rabund command outputs a .rabund file containing the lines you selected.\n");
                mothurOut("Note: No spaces between parameter labels (i.e. line), '=' and parameters (i.e.yourLines).\n\n");
@@ -129,9 +135,12 @@ int GetRAbundCommand::execute(){
                        
                        if(allLines == 1 || lines.count(count) == 1 || labels.count(list->getLabel()) == 1){
                                        mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
-                                       rabund = new RAbundVector();
+                                       rabund = new RAbundVector();                            
                                        *rabund = (list->getRAbundVector());
-                                       rabund->print(out);
+                                       
+                                       if(sorted)      {   rabund->print(out);                         }
+                                       else            {       rabund->nonSortedPrint(out);    }
+                                       
                                        delete rabund;
                                                                                                                        
                                        processedLabels.insert(list->getLabel());
@@ -146,7 +155,10 @@ int GetRAbundCommand::execute(){
                                        mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
                                        rabund = new RAbundVector();
                                        *rabund = (list->getRAbundVector());
-                                       rabund->print(out);
+                                       
+                                       if(sorted)      {   rabund->print(out);                         }
+                                       else            {       rabund->nonSortedPrint(out);    }
+
                                        delete rabund;
 
                                        processedLabels.insert(list->getLabel());
@@ -181,7 +193,10 @@ int GetRAbundCommand::execute(){
                        mothurOut(list->getLabel() + "\t" + toString(count)); mothurOutEndLine();
                        rabund = new RAbundVector();
                        *rabund = (list->getRAbundVector());
-                       rabund->print(out);
+                       
+                       if(sorted)      {   rabund->print(out);                         }
+                       else            {       rabund->nonSortedPrint(out);    }
+
                        delete rabund;
                        delete list;
                }
index 38b5ec7643fdd4e79e4d70408c59ba9fe3210be0..92e8475d154db06761e52ada6d72120bb586ce16 100644 (file)
@@ -34,7 +34,7 @@ private:
        ListVector* list;
        RAbundVector* rabund;
 
-       bool abort, allLines;
+       bool abort, allLines, sorted;
        set<int> lines; //hold lines to be used
        set<string> labels; //holds labels to be used
        string line, label;
diff --git a/pintail.cpp b/pintail.cpp
new file mode 100644 (file)
index 0000000..6ccccde
--- /dev/null
@@ -0,0 +1,587 @@
+/*
+ *  pintail.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/9/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "pintail.h"
+#include "ignoregaps.h"
+
+//***************************************************************************************************************
+
+Pintail::Pintail(string name) {
+       try {
+               fastafile = name;
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "Pintail");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+Pintail::~Pintail() {
+       try {
+               for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
+               for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "~Pintail");
+               exit(1);
+       }
+}      
+//***************************************************************************************************************
+void Pintail::print(ostream& out) {
+       try {
+               
+               for (itCoef = DE.begin(); itCoef != DE.end(); itCoef++) {
+               
+                       out << itCoef->first->getName() << '\t' << itCoef->second << endl;
+                       out << "Observed\t";
+                       
+                       itObsDist = obsDistance.find(itCoef->first);
+                       for (int i = 0; i < itObsDist->second.size(); i++) {  out << itObsDist->second[i] << '\t';  }
+                       out << endl;
+                       
+                       out << "Expected\t";
+                       
+                       itExpDist = expectedDistance.find(itCoef->first);
+                       for (int i = 0; i < itExpDist->second.size(); i++) {  out << itExpDist->second[i] << '\t';  }
+                       out << endl;
+                       
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "print");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+void Pintail::getChimeras() {
+       try {
+               
+               distCalculator = new ignoreGaps();
+               
+               //read in query sequences and subject sequences
+               mothurOut("Reading sequences and template file... "); cout.flush();
+               querySeqs = readSeqs(fastafile);
+               templateSeqs = readSeqs(templateFile);
+               mothurOut("Done."); mothurOutEndLine();
+               
+               int numSeqs = querySeqs.size();
+               
+               //if window is set to default
+               if (window == 0) {  if (querySeqs[0]->getAligned().length() > 800) {  setWindow(200); }
+                                                       else{  setWindow((querySeqs[0]->getAligned().length() / 4)); }  } 
+       
+               //calculate number of iters
+               iters = (querySeqs[0]->getAligned().length() - window + 1) / increment;
+               
+               int linesPerProcess = processors / numSeqs;
+               
+               //find breakup of sequences for all times we will Parallelize
+               if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
+               else {
+                       //fill line pairs
+                       for (int i = 0; i < (processors-1); i++) {                      
+                               lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
+                       }
+                       //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
+                       int i = processors - 1;
+                       lines.push_back(new linePair((i*linesPerProcess), numSeqs));
+               }
+               
+               //map query sequences to their most similiar sequences in the template - Parallelized
+               mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
+               if (processors == 1) {   findPairs(lines[0]->start, lines[0]->end);  } 
+               else {          createProcessesPairs();         }
+               mothurOut("Done."); mothurOutEndLine();
+       //      itBest = bestfit.begin();
+       //      itBest++; itBest++;
+//cout << itBest->first->getName() << '\t' << itBest->second->getName()        << endl;        
+               //find Oqs for each sequence - the observed distance in each window - Parallelized
+               mothurOut("Calculating observed percentage differences for each sequence... "); cout.flush();
+               if (processors == 1) {   calcObserved(lines[0]->start, lines[0]->end);  } 
+               else {          createProcessesObserved();              }
+               mothurOut("Done."); mothurOutEndLine();
+               
+               //find P
+               mothurOut("Calculating expected percentage differences for each sequence... "); cout.flush();
+               vector<float> probabilityProfile = calcFreq(templateSeqs);
+               
+               //make P into Q
+               for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];      }
+//cout << "after p into Q" << endl;            
+               //find Qav
+               averageProbability = findQav(probabilityProfile);
+//cout << "after Qav" << endl;         
+               //find Coefficient - maps a sequence to its coefficient
+               seqCoef = getCoef(averageProbability);
+//cout << "after coef" << endl;                
+               //find Eqs for each sequence - the expected distance in each window - Parallelized
+               if (processors == 1) {   calcExpected(lines[0]->start, lines[0]->end);  } 
+               else {          createProcessesExpected();              }
+               mothurOut("Done."); mothurOutEndLine();
+               
+               //find deviation - Parallelized
+               mothurOut("Finding deviation from expected... "); cout.flush();
+               if (processors == 1) {   calcDE(lines[0]->start, lines[0]->end);  } 
+               else {          createProcessesDE();            }
+               mothurOut("Done."); mothurOutEndLine();
+               
+                               
+               //free memory
+               for (int i = 0; i < lines.size(); i++)                  {       delete lines[i];                }
+               delete distCalculator;  
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "getChimeras");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+vector<Sequence*> Pintail::readSeqs(string file) {
+       try {
+       
+               ifstream in;
+               openInputFile(file, in);
+               vector<Sequence*> container;
+               
+               //read in seqs and store in vector
+               while(!in.eof()){
+                       Sequence* current = new Sequence(in);
+                       
+                       if (current->getAligned() == "") { current->setAligned(current->getUnaligned()); }
+                       
+                       container.push_back(current);
+                       
+                       gobble(in);
+               }
+               
+               in.close();
+               return container;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "readSeqs");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+//calculate the distances from each query sequence to all sequences in the template to find the closest sequence
+void Pintail::findPairs(int start, int end) {
+       try {
+               
+               for(int i = start; i < end; i++){
+               
+                       float smallest = 10000.0;
+                       Sequence query = *(querySeqs[i]);
+               
+                       for(int j = 0; j < templateSeqs.size(); j++){
+                               
+                               Sequence temp = *(templateSeqs[j]);
+                               
+                               distCalculator->calcDist(query, temp);
+                               float dist = distCalculator->getDist();
+                               
+                               if (dist < smallest) { 
+                               
+                                       bestfit[querySeqs[i]] = templateSeqs[j];
+                                       smallest = dist;
+                               }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "findPairs");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+void Pintail::calcObserved(int start, int end) {
+       try {
+
+               for(int i = start; i < end; i++){
+               
+                       itBest = bestfit.find(querySeqs[i]);
+                       Sequence* query;
+                       Sequence* subject;
+                       
+                       if (itBest != bestfit.end()) {
+                               query = itBest->first;
+                               subject = itBest->second;
+                       }else{ mothurOut("Error in calcObserved"); mothurOutEndLine(); }
+                       
+                       vector<Sequence*> queryFragment;
+                       vector<Sequence*> subjectFragment;
+                       
+                       int startpoint = 0; 
+                       for (int m = 0; m < iters; m++) {
+                               string seqFrag = query->getAligned().substr(startpoint, window);
+                               Sequence* temp = new Sequence(query->getName(), seqFrag);
+                               queryFragment.push_back(temp);
+                               
+                               string seqFragsub = subject->getAligned().substr(startpoint, window);
+                               Sequence* tempsub = new Sequence(subject->getName(), seqFragsub);
+                               subjectFragment.push_back(tempsub);
+                               
+                               startpoint += increment;
+                       }
+                       
+                       
+                       for(int j = 0; j < iters; j++){
+                       
+                               distCalculator->calcDist(*(queryFragment[j]), *(subjectFragment[j]));
+                               float dist = distCalculator->getDist();
+                               
+                               //convert to similiarity
+                               //dist = 1 - dist;
+                               
+                               //save oi
+                               obsDistance[query].push_back(dist);
+                       }
+               
+                       //free memory
+                       for (int i = 0; i < queryFragment.size(); i++)  {  delete queryFragment[i];             }
+                       for (int i = 0; i < subjectFragment.size(); i++) {  delete subjectFragment[i];  }
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "calcObserved");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+void Pintail::calcExpected(int start, int end) {
+       try {
+               
+       
+               //for each sequence
+               for(int i = start; i < end; i++){
+                       
+                       itCoef = seqCoef.find(querySeqs[i]);
+                       float coef = itCoef->second;
+                       
+                       //for each window
+                       vector<float> queryExpected;
+                       for (int m = 0; m < iters; m++) {               
+                               float expected = averageProbability[m] * coef;
+                               queryExpected.push_back(expected);              
+                       }
+                       
+                       expectedDistance[querySeqs[i]] = queryExpected;
+                       
+               }
+                               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "calcExpected");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+void Pintail::calcDE(int start, int end) {
+       try {
+               
+       
+               //for each sequence
+               for(int i = start; i < end; i++){
+                       
+                       itObsDist = obsDistance.find(querySeqs[i]);
+                       vector<float> obs = itObsDist->second;
+                       
+                       itExpDist = expectedDistance.find(querySeqs[i]);
+                       vector<float> exp = itExpDist->second;
+                       
+                       //for each window
+                       float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
+                       for (int m = 0; m < iters; m++) {               sum += ((obs[m] - exp[m]) * (obs[m] - exp[m]));         }
+                       
+                       float de = sqrt((sum / (iters - 1)));
+                       
+                       DE[querySeqs[i]] = de;
+               }
+                               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "calcDE");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
+       try {
+
+               vector<float> prob;
+               
+               //at each position in the sequence
+               for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
+               
+                       vector<int> freq;   freq.resize(4,0);
+                       
+                       //find the frequency of each nucleotide
+                       for (int j = 0; j < seqs.size(); j++) {
+                               
+                               char value = seqs[j]->getAligned()[i];
+                       
+                               if(toupper(value) == 'A')                                                                       {       freq[0]++;      }
+                               else if(toupper(value) == 'T' || toupper(value) == 'U')         {       freq[1]++;      }
+                               else if(toupper(value) == 'G')                                                          {       freq[2]++;      }
+                               else if(toupper(value) == 'C')                                                          {       freq[3]++;      }
+                       }
+                       
+                       //find base with highest frequency
+                       int highest = 0;
+                       for (int m = 0; m < freq.size(); m++) {    if (freq[m] > highest) {  highest = freq[m];  }              }
+                               
+                       float highFreq = highest / (float) seqs.size(); 
+                       float Pi;
+                       //if this is not a gap column
+                       if (highFreq != 0) { Pi =  (highFreq - 0.25) / 0.75;  }
+                       else { Pi = 0; }
+                       
+                       prob.push_back(Pi);
+                       
+               }
+               
+               return prob;
+                               
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "calcFreq");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+vector<float> Pintail::findQav(vector<float> prob) {
+       try {
+               vector<float> averages;
+               
+               //for each window find average
+               int startpoint = 0;
+               for (int m = 0; m < iters; m++) {
+                       
+                       float average = 0.0;
+                       for (int i = startpoint; i < (startpoint+window); i++) {   average += prob[i];  }
+                       
+                       average = average / window;
+                       
+                       //save this windows average
+                       averages.push_back(average);
+               
+                       startpoint += increment;
+               }
+               
+               return averages;
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "findQav");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+map<Sequence*, float> Pintail::getCoef(vector<float> prob) {
+       try {
+               map<Sequence*, float> coefs;
+               
+               //find average prob
+               float probAverage = 0.0;
+               for (int i = 0; i < prob.size(); i++) {   probAverage += prob[i];       }
+               probAverage = probAverage / (float) prob.size();
+               
+               //find a coef for each sequence
+               map<Sequence*, vector<float> >::iterator it;
+               for (it = obsDistance.begin(); it != obsDistance.end(); it++) {
+                       
+                       vector<float> temp = it->second;
+                       Sequence* tempSeq = it->first;
+                       
+                       //find observed average 
+                       float obsAverage = 0.0;
+                       for (int i = 0; i < temp.size(); i++) {   obsAverage += temp[i];        }
+                       obsAverage = obsAverage / (float) temp.size();
+                       
+                       float coef = obsAverage / probAverage;
+                       
+                       //save this sequences coefficient
+                       coefs[tempSeq] = coef;
+               }
+               
+                                               
+               return coefs;
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "getCoef");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void Pintail::createProcessesPairs() {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               vector<int> processIDS;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  
+                               process++;
+                       }else if (pid == 0){
+                               findPairs(lines[process]->start, lines[process]->end);
+                               exit(0);
+                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+#else
+               findPairs(lines[0]->start, lines[0]->end);
+
+#endif         
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "createProcessesPairs");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void Pintail::createProcessesObserved() {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               vector<int> processIDS;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  
+                               process++;
+                       }else if (pid == 0){
+                               calcObserved(lines[process]->start, lines[process]->end);
+                               exit(0);
+                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+#else
+               calcObserved(lines[0]->start, lines[0]->end);
+
+#endif         
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "createProcessesObserved");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+void Pintail::createProcessesExpected() {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               vector<int> processIDS;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  
+                               process++;
+                       }else if (pid == 0){
+                               calcExpected(lines[process]->start, lines[process]->end);
+                               exit(0);
+                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+#else
+               calcExpected(lines[0]->start, lines[0]->end);
+
+#endif         
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "createProcessesExpected");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
+void Pintail::createProcessesDE() {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 0;
+               vector<int> processIDS;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  
+                               process++;
+                       }else if (pid == 0){
+                               calcDE(lines[process]->start, lines[process]->end);
+                               exit(0);
+                       }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
+               }
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processors;i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+#else
+               calcDE(lines[0]->start, lines[0]->end);
+
+#endif         
+       }
+       catch(exception& e) {
+               errorOut(e, "Pintail", "createProcessesDE");
+               exit(1);
+       }
+}
+
+//***************************************************************************************************************
+
+
diff --git a/pintail.h b/pintail.h
new file mode 100644 (file)
index 0000000..bae39d7
--- /dev/null
+++ b/pintail.h
@@ -0,0 +1,81 @@
+#ifndef PINTAIL_H
+#define PINTAIL_H
+
+/*
+ *  pintail.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/9/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "chimera.h"
+#include "dist.h"
+
+//This class was created using the algorythms described in the 
+// "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
+//by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
+
+/***********************************************************/
+
+class Pintail : public Chimera {
+       
+       public:
+               Pintail(string);        
+               ~Pintail();
+               
+               void getChimeras();
+               void print(ostream&);
+               
+               
+       private:
+       
+               struct linePair {
+                       int start;
+                       int end;
+                       linePair(int i, int j) : start(i), end(j) {}
+               };
+
+       
+               Dist* distCalculator;
+               string fastafile;
+               int iters;
+               vector<linePair*> lines;
+               vector<Sequence*> querySeqs;
+               vector<Sequence*> templateSeqs;
+               
+               map<Sequence*, Sequence*> bestfit;  //maps a query sequence to its most similiar sequence in the template
+               map<Sequence*, Sequence*>::iterator itBest;
+               
+               map<Sequence*, vector<float> > obsDistance;  //maps a query sequence to its observed distance at each window
+               map<Sequence*, vector<float> > expectedDistance;  //maps a query sequence to its expected distance at each window
+               map<Sequence*, vector<float> >::iterator itObsDist;
+               map<Sequence*, vector<float> >::iterator itExpDist;
+               
+               vector<float> averageProbability;                       //Qav
+               map<Sequence*, float> seqCoef;                          //maps a sequence to its coefficient
+               map<Sequence*, float> DE;                                       //maps a sequence to its deviation
+               map<Sequence*, float>::iterator itCoef;         
+               
+               vector<Sequence*> readSeqs(string);
+               vector<float> findQav(vector<float>);
+               vector<float> calcFreq(vector<Sequence*>);
+               map<Sequence*, float> getCoef(vector<float>);
+               
+               void findPairs(int, int);
+               void calcObserved(int, int);
+               void calcExpected(int, int);
+               void calcDE(int, int);
+       
+               void createProcessesPairs();
+               void createProcessesObserved();
+               void createProcessesExpected();
+               void createProcessesDE();
+               
+};
+
+/***********************************************************/
+
+#endif
+
index ce04df11ccd769a233ce30bfdf0554f34377d6cd..7d6247001c645b4b312d0e6599d0c4996fffa44b 100644 (file)
@@ -193,6 +193,19 @@ vector<int>::reverse_iterator RAbundVector::rend(){
        return data.rend();                                     
 }
 
+/***********************************************************************/
+void RAbundVector::nonSortedPrint(ostream& output){
+       try {   
+               output << label << '\t' << numBins << '\t';
+       
+               for(int i=0;i<numBins;i++){             output << data[i] << '\t';              }
+               output << endl;
+       }
+       catch(exception& e) {
+               errorOut(e, "RAbundVector", "nonSortedPrint");
+               exit(1);
+       }
+}
 /***********************************************************************/
 void RAbundVector::print(string prefix, ostream& output){
        try {   
@@ -210,6 +223,7 @@ void RAbundVector::print(string prefix, ostream& output){
        }
 }
 
+
 /***********************************************************************/
 void RAbundVector::print(ostream& output){
        try {
index 62520e6f12ba0530c5fea7c6db4c7f9b52222ea3..fab02a921a602524e7516bdcbd7efc64326cbbea 100644 (file)
@@ -46,6 +46,7 @@ public:
        
        void print(ostream&);
        void print(string, ostream&);
+       void nonSortedPrint(ostream&);
        
        RAbundVector getRAbundVector();
        SAbundVector getSAbundVector();
index 0820b0521961224c04f3b7c6207f4fd3d490c5fd..f341d0a03e3d35177895ab49696f7fb4f1f7ad20 100644 (file)
@@ -19,7 +19,7 @@ SystemCommand::SystemCommand(string option){
                if(option == "help") { help(); abort = true; }
                
                else {
-                       if (option = "") { mothurOut("You must enter a command to run."); mothurOutEndLine(); abort = true; }
+                       if (option == "") { mothurOut("You must enter a command to run."); mothurOutEndLine(); abort = true; }
                        else { command = option; }
                }