]> git.donarmstrong.com Git - mothur.git/commitdiff
finished chimeraslayer method and added get.listcount command
authorwestcott <westcott>
Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
committerwestcott <westcott>
Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
18 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
ccode.h
chimera.h
chimeracheckrdp.h
chimeraseqscommand.cpp
chimeraslayer.cpp
chimeraslayer.h
commandfactory.cpp
getlistcountcommand.cpp [new file with mode: 0644]
getlistcountcommand.h [new file with mode: 0644]
helpcommand.cpp
maligner.cpp
mothur.h
nast.cpp
pintail.h
slayer.cpp
slayer.h

index 6dbf3194aaaeb4d257f81822fc114dfcffa279d0..fa7c67251436f8c7c3997bf2a862de1eb1166bda 100644 (file)
                A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
                A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; };
                A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
+               A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
                A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
                A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; };
                A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; };
                A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; };
                A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; };
                A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; };
+               A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = "<group>"; };
+               A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = "<group>"; };
                A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
                A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
                A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
                                A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */,
                                A70B53A90F4CD7AD0064797E /* getlinecommand.h */,
                                A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
+                               A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */,
+                               A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */,
                                370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
                                370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
                                3749273D0FD5956B0031C06B /* getrabundcommand.h */,
                                A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */,
                                A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */,
                                A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */,
+                               A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index da807bdd5103e5f8f8024217e12b15e4ba1b59ce..48c67ec4569ef65381b087c02323545302097bfe 100644 (file)
@@ -276,26 +276,27 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
                        if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
                                alignment->resize(candidateSeq->getUnaligned().length()+1);
                        }
-                       
+       
                        report.setCandidate(candidateSeq);
        
                        Sequence temp = templateDB->findClosestSequence(candidateSeq);
                        Sequence* templateSeq = &temp;
-                       
+       
                        report.setTemplate(templateSeq);
                        report.setSearchParameters(search, templateDB->getSearchScore());
-                       
+                               
                        Nast nast(alignment, candidateSeq, templateSeq);
 
                        report.setAlignmentParameters(align, alignment);
-
+       
                        report.setNastParameters(nast);
 
                        alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
-                       
+                               
                        report.print();
                        
-                       delete candidateSeq;            
+                       delete candidateSeq;
+       
                }
                
                alignmentFile.close();
diff --git a/ccode.h b/ccode.h
index bc1aad8b2b6776c57922ad644e40ef325ea78d4d..de7c7c44724a04bf5b70c7a8336a0e3cd0abdc42 100644 (file)
--- a/ccode.h
+++ b/ccode.h
@@ -14,6 +14,7 @@
 #include "dist.h"
 #include "decalc.h"
 
+/***********************************************************/
 //This class was created using the algorythms described in the 
 // "Evaluating putative chimeric sequences from PCR-amplified products" paper 
 //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
index 2d6ac0b2a7495bbf2408e38b18f52e0c0c330dcc..33e514b0f05d424740f2a660066e1ace6d99d40d 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -76,7 +76,7 @@ class Chimera {
                virtual void setDivR(float d)                   {       divR = d;                       }
                virtual void setParents(int p)                  {       parents = p;            }
                virtual void setMinSim(int s)                   {       minSim = s;                     }
-               virtual void setPrint(int p)                    {       printAll = p;           }
+               virtual void setIters(int i)                    {       iters = i;                      }
 
                
                virtual void setCons(string){};
@@ -94,8 +94,8 @@ class Chimera {
                
        protected:
                
-               bool filter, correction, svg, printAll;
-               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents;
+               bool filter, correction, svg;
+               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters;
                float divR;
                string seqMask, quanfile, filterString, name;
                        
index 7563c023d0b774917b25bbec62390b43d180922d..f436718ef8cc7ae5d63451edadd0eee960d3f9fc 100644 (file)
@@ -16,6 +16,7 @@
 #include "kmerdb.hpp"
 #include "database.hpp"
 
+/***********************************************************/
 //This class was created using the algorythms described in 
 //CHIMERA_CHECK version 2.7 written by Niels Larsen. 
 
index f368869449242ff52fc4c6bce4057a0a17013798..14973ccc3c65d05952783acaf3ba03431e842106 100644 (file)
@@ -26,7 +26,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "printall" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -78,9 +78,6 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
                        correction = isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "printall", false);                 if (temp == "not found") { temp = "F"; }
-                       printAll = isTrue(temp);
-                       
                        temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
                        convert(temp, processors);
                        
@@ -109,6 +106,9 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        
                        temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "5"; }
                        convert(temp, parents); 
+                       
+                       temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
+                       convert(temp, iters); 
                         
                        temp = validParameter.validFile(parameters, "increment", false);                
                        if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
@@ -140,7 +140,7 @@ void ChimeraSeqsCommand::help(){
                //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
                //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
                mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
-               mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name.\n");
+               mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
                mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
                mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
                mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
@@ -156,6 +156,7 @@ void ChimeraSeqsCommand::help(){
                mothurOut("The ksize parameter allows you to input kmersize. \n");
                mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
                mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+               mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
                mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                mothurOut("Details for each method: \n"); 
                mothurOut("\tpintail: \n"); 
@@ -220,7 +221,7 @@ int ChimeraSeqsCommand::execute(){
                chimera->setDivR(divR);
                chimera->setParents(parents);
                chimera->setMinSim(minSimilarity);
-               chimera->setPrint(printAll);
+               chimera->setIters(iters);
                
                                
                //find chimeras
index bd1908dd035f6a123ee1f749dfb47579ef5b831d..86ed532490e69f5dd628aa654d85b18c3577f5ec 100644 (file)
@@ -27,118 +27,28 @@ ChimeraSlayer::~ChimeraSlayer() {
 void ChimeraSlayer::print(ostream& out) {
        try {
                mothurOutEndLine();
+               mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
+               mothurOutEndLine();
                
                for (int i = 0; i < querySeqs.size(); i++) {
                
                        if (chimeraFlags[i] == "yes") { 
-                               mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
-                       
+                               if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
+                                       mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
+                                       out << querySeqs[i]->getName() << "\tyes" << endl;
+                               }else {
+                                       out << querySeqs[i]->getName() << "\tno" << endl;
+                                       mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+                               }
+
+                               printBlock(chimeraResults[i][0], out, i);
+                               
+                               out << endl;
                        }else{
                                out << querySeqs[i]->getName() << "\tno" << endl;
-                               mothurOut("no");
+                               mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
                        }
                }
-/*             
-       
-       my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB};
-       my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA};
-       
-       my $per_id_QLA = $data_struct->{per_id_QLA};
-       my $per_id_QRB = $data_struct->{per_id_QRB};
-       my $per_id_AB = $data_struct->{per_id_AB};
-       my $per_id_QA = $data_struct->{per_id_QA};
-       my $per_id_QB = $data_struct->{per_id_QB}; 
-       my $per_id_LAB = $data_struct->{per_id_LAB};
-       my $per_id_RAB = $data_struct->{per_id_RAB};
-       my $per_id_QRA = $data_struct->{per_id_QRA};
-       my $per_id_QLB = $data_struct->{per_id_QLB};
-       my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA};
-       my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB};
-       
-       my $win_left_end5 = $data_struct->{win_left_end5};
-       my $win_left_end3 = $data_struct->{win_left_end3};
-       my $win_right_end5 = $data_struct->{win_right_end5};
-       my $win_right_end3 = $data_struct->{win_right_end3};
-       my $Q = $data_struct->{query_alignment};
-       my $A = $data_struct->{parent_A_alignment};
-       my $B = $data_struct->{parent_B_alignment}; 
-       my $BS_A = $data_struct->{BS_A};
-       my $BS_B = $data_struct->{BS_B};
-       
-       my @Q_chars = @{$Q->{align}};
-       my @A_chars = @{$A->{align}};
-       my @B_chars = @{$B->{align}};
-       
-       my $query_acc = $Q->{acc};
-       my $A_acc = $A->{acc};
-       my $B_acc = $B->{acc};
-       
-       my $break_left = $Q->{seqPos}->[$win_left_end3];
-       my $break_right = $Q->{seqPos}->[$win_right_end5];
-       
-       
-       cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl  
-               << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB)
-               << "\tBS_QLARB: " . sprintf("%.2f", $BS_A)
-               << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB)
-               << "\tBS_QRALB: " . sprintf("%.2f", $BS_B)
-               << "\t$A_acc\t$B_acc" 
-               << "\tbreakpoint: $break_left-$break_right\n\n";
-       
-       ## draw illustration:
-
-       print "            Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n";
-       print "           Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n";
-       print "--------------------------------------------------- A: $A_acc\n"
-               . " " . sprintf("%.2f", $per_id_QLA) . "                                " . sprintf("%.2f", $per_id_QRA) . "\n"
-               . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
-               . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . "     |\n"
-               . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . "   |\n"
-               . "                         |\n"
-               . "   (L-AB: " . sprintf("%.2f", $per_id_LAB) . ")         |      (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n"
-               . "   WinL:$win_left_end5-$win_left_end3            |      WinR:$win_right_end5-$win_right_end3\n"
-               . "                         |\n"
-               . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . "   |\n"
-               . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . "    |\n"
-               . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
-               . " " . sprintf("%.2f", $per_id_QLB) . "                                " . sprintf("%.2f", $per_id_QRB) . "\n"
-               . "---------------------------------------------------- B: $B_acc\n";
-       print "            Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n";
-       
-       my $deltaL = $per_id_QLA - $per_id_QLB;
-       my $deltaR = $per_id_QRA - $per_id_QRB;
-
-       print "DeltaL: " . sprintf("%.2f", $deltaL) . "                   DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n";
-       
-       unless ($printAlignmentsFlag) { return; }
-       
-       
-       ## build the left windows:
-       my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3];
-       my @A_left_win = @A_chars[$win_left_end5..$win_left_end3];
-       my @B_left_win = @B_chars[$win_left_end5..$win_left_end3];
-       
-       &print_alignment($A_acc, \@A_left_win, 
-                                        $query_acc, \@Q_left_win, 
-                                        $B_acc, \@B_left_win);
-       
-       print "\t\t** Breakpoint **\n\n";
-       
-       my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3];
-       my @A_right_win = @A_chars[$win_right_end5..$win_right_end3];
-       my @B_right_win = @B_chars[$win_right_end5..$win_right_end3];
-       
-       &print_alignment($A_acc, \@A_right_win, 
-                                        $query_acc, \@Q_right_win, 
-                                        $B_acc, \@B_right_win);
-       
-       return;
-}
-
-
-####
-               
-       */      
                                
        }
        catch(exception& e) {
@@ -161,6 +71,7 @@ void ChimeraSlayer::getChimeras() {
                
                chimeraResults.resize(numSeqs);
                chimeraFlags.resize(numSeqs, "no");
+               spotMap.resize(numSeqs);
                
                //break up file if needed
                int linesPerProcess = numSeqs / processors ;
@@ -183,39 +94,56 @@ void ChimeraSlayer::getChimeras() {
                
                if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
                
+               //initialize spotMap
+               for (int j = 0; j < numSeqs; j++) {
+                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
+                               spotMap[j][i] = i;
+                       }
+               }
+               
                //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
                maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
-               slayer = new Slayer(window, increment, minSim, divR);
+               slayer = new Slayer(window, increment, minSim, divR, iters);
                
                for (int i = 0; i < querySeqs.size(); i++) {
                
                        string chimeraFlag = maligner->getResults(querySeqs[i]);
-                       float percentIdentical = maligner->getPercentID();
+                       //float percentIdentical = maligner->getPercentID();
                        vector<results> Results = maligner->getOutput();
                        
-                       //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
+                       cout << "Processing sequence: " << i+1 << endl;
                        
-                       for (int j = 0; j < Results.size(); j++) {
+                       //for (int j = 0; j < Results.size(); j++) {
                                //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
-                       }
+                       //}
                        
-                       if (chimeraFlag == "yes") {
+                       //if (chimeraFlag == "yes") {
                        
                                //get sequence that were given from maligner results
                                vector<SeqDist> seqs;
+                               map<string, float> removeDups;
+                               map<string, float>::iterator itDup;
                                for (int j = 0; j < Results.size(); j++) {
-                                       Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
-                                       
-                                       //seq = NULL if error occurred in getSequence
-                                       if (seq == NULL) {  break;      }
-                                       else {  
-                                               SeqDist member;
-                                               member.seq = seq;
-                                               member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
-                                               seqs.push_back(member); 
+                                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+                                       //only add if you are not a duplicate
+                                       itDup = removeDups.find(Results[j].parent);
+                                       if (itDup == removeDups.end()) { //this is not duplicate
+                                               removeDups[Results[j].parent] = dist;
+                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                                               removeDups[Results[j].parent] = dist;
                                        }
                                }
-                       
+                               
+                               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
+                                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
+                                       
+                                       SeqDist member;
+                                       member.seq = seq;
+                                       member.dist = itDup->second;
+                                       
+                                       seqs.push_back(member);
+                               }
+                               
                                //limit number of parents to explore - default 5
                                if (Results.size() > parents) {
                                        //sort by distance
@@ -232,6 +160,24 @@ void ChimeraSlayer::getChimeras() {
                                //put seqs into vector to send to slayer
                                vector<Sequence*> seqsForSlayer;
                                for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+//ofstream out;
+//string name = querySeqs[i]->getName();
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(0, name.find_last_of("|"));
+//cout << name << endl;
+//string filename = name + ".seqsforslayer";
+//openOutputFile(filename, out);       
+//for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);       }
+//out.close();
+//filename = name + ".fasta";
+//openOutputFile(filename, out);       
+//q->printSequence(out);
+//out.close();
+
                        
                                //mask then send to slayer...
                                if (seqMask != "") {
@@ -245,6 +191,10 @@ void ChimeraSlayer::getChimeras() {
                                                decalc->runMask(seqsForSlayer[k]);
                                        }
                                        
+                                       for (int i = 0; i < numSeqs; i++) {
+                                               spotMap[i] = decalc->getMaskMap();
+                                       }
+
                                }
                                
                                //send to slayer
@@ -253,7 +203,7 @@ void ChimeraSlayer::getChimeras() {
                        
                                //free memory
                                for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
-                       }
+                       //}
                        
                }       
                //free memory
@@ -296,6 +246,34 @@ Sequence* ChimeraSlayer::getSequence(string name) {
        }
 }
 //***************************************************************************************************************
+void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
+       try {
+               
+               out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
+               out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
+               out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
+               
+               out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
+               out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
+               
+               out << "Similarity of parents: " << data.ab << endl;
+               out << "Similarity of query to parentA: " << data.qa << endl;
+               out << "Similarity of query to parentB: " << data.qb << endl;
+               
+               //out << "Per_id(QL,A): " << data.qla << endl;
+               //out << "Per_id(QL,B): " << data.qlb << endl;
+               //out << "Per_id(QR,A): " << data.qra << endl;
+               //out << "Per_id(QR,B): " << data.qrb << endl;
 
+               
+               out << "DeltaL: " << (data.qla - data.qlb) << endl;
+               out << "DeltaR: " << (data.qra - data.qrb) << endl;
 
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "printBlock");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
 
index a305b5bfd09601220b15a44fd0ad18ee6b42a09d..379209ef382b47a8203895d85710f35926bf188e 100644 (file)
@@ -40,6 +40,7 @@ class ChimeraSlayer : public Chimera {
                vector<linePair*> lines;
                vector<Sequence*> querySeqs;
                vector<Sequence*> templateSeqs;
+               vector< map<int, int> > spotMap;
                
                vector< vector<data_struct> > chimeraResults;
                vector<string> chimeraFlags;
@@ -47,8 +48,7 @@ class ChimeraSlayer : public Chimera {
                string fastafile, templateFile;
                
                Sequence* getSequence(string);  //find sequence from name
-                       
-                               
+               void printBlock(data_struct, ostream&, int i);
 };
 
 /************************************************************************/
index 433702466c5b531c84edb79a45018a589dceaf86..9adf6791505060d7379fbfc387e1ce6f5d9f5afd 100644 (file)
@@ -56,6 +56,7 @@
 #include "systemcommand.h"
 #include "secondarystructurecommand.h"
 #include "getsharedotucommand.h"
+#include "getlistcountcommand.h"
 
 /***********************************************************/
 
@@ -110,6 +111,7 @@ CommandFactory::CommandFactory(){
        commands["system"]                              = "system";
        commands["align.check"]                 = "align.check";
        commands["get.sharedotu"]               = "get.sharedotu";
+       commands["get.listcount"]               = "get.listcount";
        commands["quit"]                                = "quit"; 
 
 }
@@ -174,6 +176,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "system")                                {       command = new SystemCommand(optionString);                              }
                else if(commandName == "align.check")                   {       command = new AlignCheckCommand(optionString);                  }
                else if(commandName == "get.sharedotu")                 {       command = new GetSharedOTUCommand(optionString);                }
+               else if(commandName == "get.listcount")                 {       command = new GetListCountCommand(optionString);                }
                else                                                                                    {       command = new NoCommand(optionString);                                  }
 
                return command;
diff --git a/getlistcountcommand.cpp b/getlistcountcommand.cpp
new file mode 100644 (file)
index 0000000..f14e17a
--- /dev/null
@@ -0,0 +1,197 @@
+/*
+ *  getlistcountcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/12/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "getlistcountcommand.h"
+
+//**********************************************************************************************************************
+GetListCountCommand::GetListCountCommand(string option){
+       try {
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               allLines = 1;
+               labels.clear();
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string AlignArray[] =  {"list","label"};
+                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       string ranRead = globaldata->getListFile();
+                       
+                       //check for required parameters
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if ((listfile == "not found") && (globaldata->getListFile() == ""))  { mothurOut("You must read a listfile before running the get.listcount command.");  mothurOutEndLine(); abort = true; }
+                       else if ((listfile == "not found") && (globaldata->getListFile() != "")) { listfile = globaldata->getListFile(); }
+                       else if (listfile == "not open") { abort = true; }      
+                       else { globaldata->setListFile(listfile); }
+               
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //if the user has not specified any labels use the ones from read.otu
+                       if ((label == "") && (ranRead != "")) {  
+                               allLines = globaldata->allLines; 
+                               labels = globaldata->labels; 
+                       }
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "GetListCountCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void GetListCountCommand::help(){
+       try {
+               mothurOut("The get.listcount command can only be executed after a successful read.otu command of a listfile or providing a list file using the list parameter.\n");
+               mothurOut("The get.listcount command parameters are list and label.  No parameters are required.\n");
+               mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n");
+               mothurOut("The get.listcount command should be in the following format: get.listcount(list=yourlistFile, label=yourLabels).\n");
+               mothurOut("Example get.listcount(list=amazon.fn.list, label=0.10).\n");
+               mothurOut("The default value for label is all lines in your inputfile.\n");
+               mothurOut("The get.listcount command outputs a .otu file for each distance you specify listing the bin number and the names of the sequences in that bin.\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+GetListCountCommand::~GetListCountCommand(){}
+
+//**********************************************************************************************************************
+
+int GetListCountCommand::execute(){
+       try {
+               if (abort == true) {    return 0;       }
+
+               globaldata->setFormat("list");
+               
+               //read list file
+               read = new ReadOTUFile(listfile);       
+               read->read(&*globaldata); 
+               
+               input = globaldata->ginput;
+               list = globaldata->gListVector;
+               string lastLabel = list->getLabel();
+
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+
+               while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       
+                       if(allLines == 1 || labels.count(list->getLabel()) == 1){
+                       
+                               process(list);
+                                                       
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                       }
+                       
+                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               delete list;
+                               list = input->getListVector(lastLabel);
+                               
+                               process(list);
+                                                                                                       
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       delete list;
+                       list = input->getListVector();
+               }
+               
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (list != NULL) {             delete list;    }
+                       list = input->getListVector(lastLabel);
+                               
+                       process(list);                  
+                       delete list;  
+               }
+               
+               delete read;
+               globaldata->gListVector = NULL;
+               
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+//return 1 if error, 0 otherwise
+void GetListCountCommand::process(ListVector* list) {
+       try {
+               string binnames, name, sequence;
+               string outputFileName = getRootName(listfile) + list->getLabel() + ".otu";
+               openOutputFile(outputFileName, out);
+               
+               mothurOut(list->getLabel()); mothurOutEndLine();
+               
+               //for each bin in the list vector
+               for (int i = 0; i < list->getNumBins(); i++) {
+                       binnames = list->get(i);
+                       out << i+1 << '\t' << binnames << endl;
+               }
+               
+               out.close();
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/getlistcountcommand.h b/getlistcountcommand.h
new file mode 100644 (file)
index 0000000..468ea80
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef GETLISTCOUNTCOMMAND_H
+#define GETLISTCOUNTCOMMAND_H
+/*
+ *  getlistcountcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/12/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "readotu.h"
+
+class GlobalData;
+
+/**********************************************************/
+
+class GetListCountCommand : public Command {
+       
+public:
+       GetListCountCommand(string);    
+       ~GetListCountCommand();
+       int execute();
+       void help();    
+       
+private:
+       GlobalData* globaldata;
+       ListVector* list;
+       ReadOTUFile* read;
+       InputData* input;
+       
+       bool abort, allLines;
+       set<string> labels; //holds labels to be used
+       string label, listfile;
+       ofstream out;
+       
+       void process(ListVector*);
+};
+/**********************************************************/
+
+#endif
index ba4f53d942b899d4dc02d65c8acc68a63fdd45f7..fc2f291548e07ebc41e03bf121760d23f2c4a16d 100644 (file)
@@ -32,7 +32,7 @@ int HelpCommand::execute(){
        
        delete validCommands;
        
-       mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at pschloss@umich.edu.\n");
+       mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n");
        return 0;
 }
 
index c7f9be02054510ef2d7a995139fab5d273f91809..26ac563032bd93c8f46810d84b1c1a1ceb16c231 100644 (file)
@@ -16,6 +16,8 @@ Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, flo
 string Maligner::getResults(Sequence* q) {
        try {
                
+               outputResults.clear();
+               
                //make copy so trimming doesn't destroy query from calling class - remember to deallocate
                query = new Sequence(q->getName(), q->getAligned());
                
@@ -25,7 +27,7 @@ string Maligner::getResults(Sequence* q) {
                
                //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
                refSeqs = decalc->findClosest(query, db, numWanted);
-               
+       
                refSeqs = minCoverageFilter(refSeqs);
                
                if (refSeqs.size() < 2)  { 
index 836e0660dae78b87fe71e20bc95f59506c81d5dd..84dbdb3dee2879829909b38654c80057d9d5fdb5 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -267,7 +267,7 @@ inline void errorOut(exception& e, string object, string function) {
        
                mothurOut("Error: ");
                mothurOut(toString(e.what()));
-               mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at pschloss@umich.edu, and be sure to include the mothur.logFile with your inquiry.");
+               mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
                mothurOutEndLine();
        
 }
index 294940cd6aabd49f67aca028311d21529f0912ee..f247037cc915fd1d54d9d1e050abde8f73711fba 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -226,7 +226,7 @@ void Nast::regapSequences(){        //This is essentially part B in Fig 2. of DeSantis
                        candidateSeq->setAligned(candAln);
                        return;
                }
-               
+       
                int fullAlignIndex = 0;
                int pairwiseAlignIndex = 0;
                string newTemplateAlign = "";                                   //      this is going to be messy so we want a temporary template
@@ -323,7 +323,9 @@ void Nast::regapSequences(){        //This is essentially part B in Fig 2. of DeSantis
                        newTemplateAlign += tempAln[i];//
                }
                
-               int start, end;
+               int start = 0;
+               int end = candAln.length()-1;
+
                for(int i=0;i<candAln.length();i++){
                        if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }       //      if we padded the alignemnt from
                        else{                   start = i;                      break;          }                                                       //      blast with Z's, change them to
@@ -338,11 +340,11 @@ void Nast::regapSequences(){      //This is essentially part B in Fig 2. of DeSantis
                        candAln[i] = toupper(candAln[i]);                       //      everything is upper case
                }
                
-       
+
                if(candAln.length() != tempAln.length()){               //      if the regapped candidate sequence is longer than the official
                        removeExtraGaps(candAln, tempAln, newTemplateAlign);//  template alignment then we need to do steps C-F in Fig.
                }                                                                                               //      2 of Desantis et al.
-                       
+
                candidateSeq->setAligned(candAln);
        }
        catch(exception& e) {
index 37efd00b86793f064770784035d5e7dbfb78ee15..aada728413ffbfc9cd3cde1bedb5d07ea492e985 100644 (file)
--- a/pintail.h
+++ b/pintail.h
@@ -14,6 +14,7 @@
 #include "dist.h"
 #include "decalc.h"
 
+/***********************************************************/
 //This class was created using the algorythms described in the 
 // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
 //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
index 6e7f0e3b96eb60773d1bd0fdd2e81d84767160b1..95734c74808a3addd63b86ba8151c1f3d7bf039d 100644 (file)
 #include "slayer.h"
 
 /***********************************************************************/
-Slayer::Slayer(int win, int increment, int parentThreshold, float div) :
-               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {}
+Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i) :
+               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i){}
 /***********************************************************************/
 string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        try {
-cout << "refSeqs = " << refSeqs.size() << endl;                
                vector<data_struct> all; all.clear();
                
                for (int i = 0; i < refSeqs.size(); i++) {
@@ -46,7 +45,6 @@ cout << "refSeqs = " << refSeqs.size() << endl;
                                                
                                                float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
                                                float snpRateRight = numSNPSRight / (float) winSizeRight;
-                                               
                                                float logR = log(snpRateLeft / snpRateRight) / log(2);
                                                
                                                // do not accept excess snp ratio on either side of the break
@@ -54,7 +52,7 @@ cout << "refSeqs = " << refSeqs.size() << endl;
                                                        
                                                        float BS_A, BS_B;
                                                        bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
-                                               
+
                                                        divs[k].bsa = BS_A;
                                                        divs[k].bsb = BS_B;
                                                
@@ -101,18 +99,17 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
        try{
                
                vector<data_struct> data;
-cout << q->getName() << '\t' << q->getAligned().length() << endl;              
+                               
                //vertical filter
                vector<Sequence*> temp;
                temp.push_back(q); temp.push_back(pA); temp.push_back(pB);
-               verticalFilter(temp);
+               map<int, int> spots = verticalFilter(temp);
                
                //get these to avoid numerous function calls
                string query = q->getAligned();
                string parentA = pA->getAligned();
                string parentB = pB->getAligned();
                int length = query.length();
-cout << q->getName() << '\t' << length << endl;
                
                //check window size
                if (length < (2*windowSize+windowStep)) { 
@@ -170,10 +167,10 @@ cout << q->getName() << '\t' << length << endl;
                                        member.rab = RAB; 
                                        member.qra = QRA; 
                                        member.qlb = QLB; 
-                                       member.winLStart = 0;
-                                       member.winLEnd = breakpoint; 
-                                       member.winRStart = breakpoint+1
-                                       member.winREnd = length-1
+                                       member.winLStart = spots[0];
+                                       member.winLEnd = spots[breakpoint];  //so breakpoint reflects spot in alignment before filter
+                                       member.winRStart = spots[breakpoint+1]
+                                       member.winREnd = spots[length-1]
                                        member.querySeq = *(q); 
                                        member.parentA = *(pA);
                                        member.parentB = *(pB);
@@ -238,15 +235,16 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                int numLeft = max(1, int(left.size()/10 +0.5));
                int numRight = max(1, int(right.size()/10 + 0.5));
                
-               for (int i = 0; i < 100; i++) {
+               for (int i = 0; i < iters; i++) {
                        //random sampling with replacement.
                
                        vector<snps> selectedLeft;
+
                        for (int j = 0; j < numLeft; j++) {
                                int index = int(rand() % left.size());
                                selectedLeft.push_back(left[index]);
                        }
-               
+
                        vector<snps> selectedRight;
                        for (int j = 0; j < numRight; j++) {
                                int index = int(rand() % right.size());
@@ -268,7 +266,7 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                
                        float QLB = snpQB(selectedLeft);
                        float QRB = snpQB(selectedRight);
-                       
+       
                        //in original - not used - not sure why?
                        //float ALB = snpAB(selectedLeft);
                        //float ARB = snpAB(selectedRight);
@@ -283,8 +281,8 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                
                }
 
-               BSA = (float) count_A;
-               BSB = (float) count_B;
+               BSA = ((float) count_A / (float) iters) * 100;
+               BSB = ((float) count_B / (float) iters) * 100;
        
        }
        catch(exception& e) {
@@ -304,7 +302,7 @@ float Slayer::snpQA(vector<snps> data) {
                        }
                }
 
-               float percentID = (numIdentical / data.size()) * 100;
+               float percentID = (numIdentical / (float) data.size()) * 100;
                
                return percentID;
        }
@@ -325,7 +323,7 @@ float Slayer::snpQB(vector<snps> data) {
                        }
                }
 
-               float percentID = (numIdentical / data.size()) * 100;
+               float percentID = (numIdentical / (float) data.size()) * 100;
                
                return percentID;
 
@@ -346,7 +344,7 @@ float Slayer::snpAB(vector<snps> data) {
                        }
                }
 
-               float percentID = (numIdentical / data.size()) * 100;
+               float percentID = (numIdentical / (float) data.size()) * 100;
                
                return percentID;
 
@@ -380,7 +378,7 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
 }
 /***********************************************************************/
 //remove columns that contain any gaps
-void Slayer::verticalFilter(vector<Sequence*> seqs) {
+map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
        try {
                vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
                
@@ -399,11 +397,17 @@ void Slayer::verticalFilter(vector<Sequence*> seqs) {
                
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
+               int count = 0;
+               map<int, int> maskMap; maskMap.clear();
 
                for(int i = 0; i < seqs[0]->getAligned().length(); i++){
                        if(gaps[i] != 0)        {       filterString[i] = '0';  numColRemoved++;  }
+                       else {
+                               maskMap[count] = i;
+                               count++;
+                       }
                }
-
+               
                //for each sequence
                for (int i = 0; i < seqs.size(); i++) {
                
@@ -417,6 +421,8 @@ void Slayer::verticalFilter(vector<Sequence*> seqs) {
                        
                        seqs[i]->setAligned(newAligned);
                }
+               
+               return maskMap;
        }
        catch(exception& e) {
                errorOut(e, "Slayer", "verticalFilter");
index ea79727561fbfad37dfaf1478276e529b79df22b..dc13e6daa3f1e4ce9e2340da2cf201c9881a3083 100644 (file)
--- a/slayer.h
+++ b/slayer.h
@@ -64,7 +64,7 @@ class Slayer {
 
        public:
                
-               Slayer(int, int, int, float);
+               Slayer(int, int, int, float, int);
                ~Slayer() {};
                
                string getResults(Sequence*, vector<Sequence*>);
@@ -73,11 +73,11 @@ class Slayer {
                                
        private:
                
-               int windowSize, windowStep, parentFragmentThreshold;
+               int windowSize, windowStep, parentFragmentThreshold, iters;
                float divRThreshold; 
                vector<data_struct>  outputResults;
                
-               void verticalFilter(vector<Sequence*>);
+               map<int, int> verticalFilter(vector<Sequence*>);
                float computePercentID(string, string, int, int);
                
                vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);