finished chimeraslayer method and added get.listcount command

author westcott <westcott>

Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)

committer westcott <westcott>

Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
author westcott <westcott>
Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
committer westcott <westcott>
Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index 6dbf3194aaaeb4d257f81822fc114dfcffa279d0..fa7c67251436f8c7c3997bf2a862de1eb1166bda 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -164,6 +164,7 @@
                 A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */; };
                 A70DECD91063D8B40057C03C /* secondarystructurecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */; };
                 A7283FF81056CAE100D0CC69 /* chimeracheckrdp.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */; };
+               A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */; };
                 A75B887E104C16860083C454 /* ccode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75B887B104C16860083C454 /* ccode.cpp */; };
                 A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B04492106CC3E60046FC83 /* chimeraslayer.cpp */; };
                 A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7B0450D106CEEC90046FC83 /* slayer.cpp */; };
@@ -524,6 +525,8 @@
                 A70DECD81063D8B40057C03C /* secondarystructurecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = secondarystructurecommand.cpp; sourceTree = SOURCE_ROOT; };
                 A7283FF61056CAE100D0CC69 /* chimeracheckrdp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeracheckrdp.h; sourceTree = SOURCE_ROOT; };
                 A7283FF71056CAE100D0CC69 /* chimeracheckrdp.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckrdp.cpp; sourceTree = SOURCE_ROOT; };
+               A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getlistcountcommand.h; sourceTree = "<group>"; };
+               A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getlistcountcommand.cpp; sourceTree = "<group>"; };
                 A75B887B104C16860083C454 /* ccode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ccode.cpp; sourceTree = SOURCE_ROOT; };
                 A75B887C104C16860083C454 /* ccode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ccode.h; sourceTree = SOURCE_ROOT; };
                 A7B04491106CC3E60046FC83 /* chimeraslayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayer.h; sourceTree = SOURCE_ROOT; };
@@ -845,6 +848,8 @@
                                 A70B53A60F4CD7AD0064797E /* getlabelcommand.cpp */,
                                 A70B53A90F4CD7AD0064797E /* getlinecommand.h */,
                                 A70B53A80F4CD7AD0064797E /* getlinecommand.cpp */,
+                               A73329CD1083B3B3003B10C5 /* getlistcountcommand.h */,
+                               A73329CE1083B3B3003B10C5 /* getlistcountcommand.cpp */,
                                 370B88050F8A4EE4005AB382 /* getoturepcommand.h */,
                                 370B88060F8A4EE4005AB382 /* getoturepcommand.cpp */,
                                 3749273D0FD5956B0031C06B /* getrabundcommand.h */,
@@ -1209,6 +1214,7 @@
                                 A7E4A824106A9AD700688F62 /* maligner.cpp in Sources */,
                                 A7B04493106CC3E60046FC83 /* chimeraslayer.cpp in Sources */,
                                 A7B0450E106CEEC90046FC83 /* slayer.cpp in Sources */,
+                               A73329CF1083B3B3003B10C5 /* getlistcountcommand.cpp in Sources */,
                         );
                         runOnlyForDeploymentPostprocessing = 0;
                 };
diff --git a/aligncommand.cpp b/aligncommand.cpp

index da807bdd5103e5f8f8024217e12b15e4ba1b59ce..48c67ec4569ef65381b087c02323545302097bfe 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -276,26 +276,27 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
                         if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
                                 alignment->resize(candidateSeq->getUnaligned().length()+1);
                         }
-                       
+       
                         report.setCandidate(candidateSeq);
         
                         Sequence temp = templateDB->findClosestSequence(candidateSeq);
                         Sequence* templateSeq = &temp;
-                       
+       
                         report.setTemplate(templateSeq);
                         report.setSearchParameters(search, templateDB->getSearchScore());
-                       
+                               
                         Nast nast(alignment, candidateSeq, templateSeq);
  
                         report.setAlignmentParameters(align, alignment);
-
+       
                         report.setNastParameters(nast);
  
                         alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
-                       
+                               
                         report.print();
                         
-                       delete candidateSeq;            
+                       delete candidateSeq;
+       
                 }
                 
                 alignmentFile.close();
diff --git a/ccode.h b/ccode.h

index bc1aad8b2b6776c57922ad644e40ef325ea78d4d..de7c7c44724a04bf5b70c7a8336a0e3cd0abdc42 100644 (file)
--- a/ccode.h
+++ b/ccode.h
@@ -14,6 +14,7 @@
  #include "dist.h"
  #include "decalc.h"
  
+/***********************************************************/
  //This class was created using the algorythms described in the 
  // "Evaluating putative chimeric sequences from PCR-amplified products" paper 
  //by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.
diff --git a/chimera.h b/chimera.h

index 2d6ac0b2a7495bbf2408e38b18f52e0c0c330dcc..33e514b0f05d424740f2a660066e1ace6d99d40d 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -76,7 +76,7 @@ class Chimera {
                 virtual void setDivR(float d)                   {       divR = d;                       }
                 virtual void setParents(int p)                  {       parents = p;            }
                 virtual void setMinSim(int s)                   {       minSim = s;                     }
-               virtual void setPrint(int p)                    {       printAll = p;           }
+               virtual void setIters(int i)                    {       iters = i;                      }
  
                 
                 virtual void setCons(string){};
@@ -94,8 +94,8 @@ class Chimera {
                 
         protected:
                 
-               bool filter, correction, svg, printAll;
-               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents;
+               bool filter, correction, svg;
+               int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents, iters;
                 float divR;
                 string seqMask, quanfile, filterString, name;
                         
diff --git a/chimeracheckrdp.h b/chimeracheckrdp.h

index 7563c023d0b774917b25bbec62390b43d180922d..f436718ef8cc7ae5d63451edadd0eee960d3f9fc 100644 (file)
--- a/chimeracheckrdp.h
+++ b/chimeracheckrdp.h
@@ -16,6 +16,7 @@
  #include "kmerdb.hpp"
  #include "database.hpp"
  
+/***********************************************************/
  //This class was created using the algorythms described in 
  //CHIMERA_CHECK version 2.7 written by Niels Larsen. 
  
diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp

index f368869449242ff52fc4c6bce4057a0a17013798..14973ccc3c65d05952783acaf3ba03431e842106 100644 (file)
--- a/chimeraseqscommand.cpp
+++ b/chimeraseqscommand.cpp
@@ -26,7 +26,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "printall" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "iters" };
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -78,9 +78,6 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                         temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
                         correction = isTrue(temp);
                         
-                       temp = validParameter.validFile(parameters, "printall", false);                 if (temp == "not found") { temp = "F"; }
-                       printAll = isTrue(temp);
-                       
                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
                         convert(temp, processors);
                         
@@ -109,6 +106,9 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                         
                         temp = validParameter.validFile(parameters, "parents", false);                  if (temp == "not found") { temp = "5"; }
                         convert(temp, parents); 
+                       
+                       temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
+                       convert(temp, iters); 
                          
                         temp = validParameter.validFile(parameters, "increment", false);                
                         if ((temp == "not found") && ((method == "chimeracheck") || (method == "chimeraslayer"))) { temp = "10"; }
@@ -140,7 +140,7 @@ void ChimeraSeqsCommand::help(){
                 //"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name"
                 //mothurOut("chimera.seqs ASSUMES that your sequences are ALIGNED and if using a template that the template file sequences are the same length as the fasta file sequences.\n\n");
                 mothurOut("The chimera.seqs command reads a fastafile and creates list of potentially chimeric sequences.\n");
-               mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name.\n");
+               mothurOut("The chimera.seqs command parameters are fasta, filter, correction, processors, mask, method, window, increment, template, conservation, quantile, numwanted, ksize, svg, name, iters.\n");
                 mothurOut("The fasta parameter is always required and template is required if using pintail, ccode or chimeracheck.\n");
                 mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
                 mothurOut("The correction parameter allows you to put more emphasis on the distance between highly similar sequences and less emphasis on the differences between remote homologs.\n");
@@ -156,6 +156,7 @@ void ChimeraSeqsCommand::help(){
                 mothurOut("The ksize parameter allows you to input kmersize. \n");
                 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
                 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
+               mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
                 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                 mothurOut("Details for each method: \n"); 
                 mothurOut("\tpintail: \n"); 
@@ -220,7 +221,7 @@ int ChimeraSeqsCommand::execute(){
                 chimera->setDivR(divR);
                 chimera->setParents(parents);
                 chimera->setMinSim(minSimilarity);
-               chimera->setPrint(printAll);
+               chimera->setIters(iters);
                 
                                 
                 //find chimeras
diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp

index bd1908dd035f6a123ee1f749dfb47579ef5b831d..86ed532490e69f5dd628aa654d85b18c3577f5ec 100644 (file)
--- a/chimeraslayer.cpp
+++ b/chimeraslayer.cpp
@@ -27,118 +27,28 @@ ChimeraSlayer::~ChimeraSlayer() {
  void ChimeraSlayer::print(ostream& out) {
         try {
                 mothurOutEndLine();
+               mothurOut("Only reporting sequence supported by 90% of bootstrapped results.");
+               mothurOutEndLine();
                 
                 for (int i = 0; i < querySeqs.size(); i++) {
                 
                         if (chimeraFlags[i] == "yes") { 
-                               mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
-                       
+                               if ((chimeraResults[i][0].bsa >= 90.0) || (chimeraResults[i][0].bsb >= 90.0)) {
+                                       mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
+                                       out << querySeqs[i]->getName() << "\tyes" << endl;
+                               }else {
+                                       out << querySeqs[i]->getName() << "\tno" << endl;
+                                       mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
+                               }
+
+                               printBlock(chimeraResults[i][0], out, i);
+                               
+                               out << endl;
                         }else{
                                 out << querySeqs[i]->getName() << "\tno" << endl;
-                               mothurOut("no");
+                               mothurOut(querySeqs[i]->getName() + "\tno"); mothurOutEndLine();
                         }
                 }
-/*             
-       
-       my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB};
-       my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA};
-       
-       my $per_id_QLA = $data_struct->{per_id_QLA};
-       my $per_id_QRB = $data_struct->{per_id_QRB};
-       my $per_id_AB = $data_struct->{per_id_AB};
-       my $per_id_QA = $data_struct->{per_id_QA};
-       my $per_id_QB = $data_struct->{per_id_QB}; 
-       my $per_id_LAB = $data_struct->{per_id_LAB};
-       my $per_id_RAB = $data_struct->{per_id_RAB};
-       my $per_id_QRA = $data_struct->{per_id_QRA};
-       my $per_id_QLB = $data_struct->{per_id_QLB};
-       my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA};
-       my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB};
-       
-       my $win_left_end5 = $data_struct->{win_left_end5};
-       my $win_left_end3 = $data_struct->{win_left_end3};
-       my $win_right_end5 = $data_struct->{win_right_end5};
-       my $win_right_end3 = $data_struct->{win_right_end3};
-       my $Q = $data_struct->{query_alignment};
-       my $A = $data_struct->{parent_A_alignment};
-       my $B = $data_struct->{parent_B_alignment}; 
-       my $BS_A = $data_struct->{BS_A};
-       my $BS_B = $data_struct->{BS_B};
-       
-       my @Q_chars = @{$Q->{align}};
-       my @A_chars = @{$A->{align}};
-       my @B_chars = @{$B->{align}};
-       
-       my $query_acc = $Q->{acc};
-       my $A_acc = $A->{acc};
-       my $B_acc = $B->{acc};
-       
-       my $break_left = $Q->{seqPos}->[$win_left_end3];
-       my $break_right = $Q->{seqPos}->[$win_right_end5];
-       
-       
-       cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl  
-               << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB)
-               << "\tBS_QLARB: " . sprintf("%.2f", $BS_A)
-               << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB)
-               << "\tBS_QRALB: " . sprintf("%.2f", $BS_B)
-               << "\t$A_acc\t$B_acc" 
-               << "\tbreakpoint: $break_left-$break_right\n\n";
-       
-       ## draw illustration:
-
-       print "            Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n";
-       print "           Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n";
-       print "--------------------------------------------------- A: $A_acc\n"
-               . " " . sprintf("%.2f", $per_id_QLA) . "                                " . sprintf("%.2f", $per_id_QRA) . "\n"
-               . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
-               . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . "     |\n"
-               . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . "   |\n"
-               . "                         |\n"
-               . "   (L-AB: " . sprintf("%.2f", $per_id_LAB) . ")         |      (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n"
-               . "   WinL:$win_left_end5-$win_left_end3            |      WinR:$win_right_end5-$win_right_end3\n"
-               . "                         |\n"
-               . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . "   |\n"
-               . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . "    |\n"
-               . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
-               . " " . sprintf("%.2f", $per_id_QLB) . "                                " . sprintf("%.2f", $per_id_QRB) . "\n"
-               . "---------------------------------------------------- B: $B_acc\n";
-       print "            Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n";
-       
-       my $deltaL = $per_id_QLA - $per_id_QLB;
-       my $deltaR = $per_id_QRA - $per_id_QRB;
-
-       print "DeltaL: " . sprintf("%.2f", $deltaL) . "                   DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n";
-       
-       unless ($printAlignmentsFlag) { return; }
-       
-       
-       ## build the left windows:
-       my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3];
-       my @A_left_win = @A_chars[$win_left_end5..$win_left_end3];
-       my @B_left_win = @B_chars[$win_left_end5..$win_left_end3];
-       
-       &print_alignment($A_acc, \@A_left_win, 
-                                        $query_acc, \@Q_left_win, 
-                                        $B_acc, \@B_left_win);
-       
-       print "\t\t** Breakpoint **\n\n";
-       
-       my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3];
-       my @A_right_win = @A_chars[$win_right_end5..$win_right_end3];
-       my @B_right_win = @B_chars[$win_right_end5..$win_right_end3];
-       
-       &print_alignment($A_acc, \@A_right_win, 
-                                        $query_acc, \@Q_right_win, 
-                                        $B_acc, \@B_right_win);
-       
-       return;
-}
-
-
-####
-               
-       */      
                                 
         }
         catch(exception& e) {
@@ -161,6 +71,7 @@ void ChimeraSlayer::getChimeras() {
                 
                 chimeraResults.resize(numSeqs);
                 chimeraFlags.resize(numSeqs, "no");
+               spotMap.resize(numSeqs);
                 
                 //break up file if needed
                 int linesPerProcess = numSeqs / processors ;
@@ -183,39 +94,56 @@ void ChimeraSlayer::getChimeras() {
                 
                 if (seqMask != "") {    decalc = new DeCalculator();    } //to use below
                 
+               //initialize spotMap
+               for (int j = 0; j < numSeqs; j++) {
+                       for (int i = 0; i < querySeqs[0]->getAligned().length(); i++) {
+                               spotMap[j][i] = i;
+                       }
+               }
+               
                 //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
                 maligner = new Maligner(templateSeqs, numWanted, match, misMatch, 1.01, minSim);
-               slayer = new Slayer(window, increment, minSim, divR);
+               slayer = new Slayer(window, increment, minSim, divR, iters);
                 
                 for (int i = 0; i < querySeqs.size(); i++) {
                 
                         string chimeraFlag = maligner->getResults(querySeqs[i]);
-                       float percentIdentical = maligner->getPercentID();
+                       //float percentIdentical = maligner->getPercentID();
                         vector<results> Results = maligner->getOutput();
                         
-                       //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
+                       cout << "Processing sequence: " << i+1 << endl;
                         
-                       for (int j = 0; j < Results.size(); j++) {
+                       //for (int j = 0; j < Results.size(); j++) {
                                 //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
-                       }
+                       //}
                         
-                       if (chimeraFlag == "yes") {
+                       //if (chimeraFlag == "yes") {
                         
                                 //get sequence that were given from maligner results
                                 vector<SeqDist> seqs;
+                               map<string, float> removeDups;
+                               map<string, float>::iterator itDup;
                                 for (int j = 0; j < Results.size(); j++) {
-                                       Sequence* seq = getSequence(Results[j].parent); //makes copy so you can filter and mask and not effect template
-                                       
-                                       //seq = NULL if error occurred in getSequence
-                                       if (seq == NULL) {  break;      }
-                                       else {  
-                                               SeqDist member;
-                                               member.seq = seq;
-                                               member.dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
-                                               seqs.push_back(member); 
+                                       float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
+                                       //only add if you are not a duplicate
+                                       itDup = removeDups.find(Results[j].parent);
+                                       if (itDup == removeDups.end()) { //this is not duplicate
+                                               removeDups[Results[j].parent] = dist;
+                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                                               removeDups[Results[j].parent] = dist;
                                         }
                                 }
-                       
+                               
+                               for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
+                                       Sequence* seq = getSequence(itDup->first); //makes copy so you can filter and mask and not effect template
+                                       
+                                       SeqDist member;
+                                       member.seq = seq;
+                                       member.dist = itDup->second;
+                                       
+                                       seqs.push_back(member);
+                               }
+                               
                                 //limit number of parents to explore - default 5
                                 if (Results.size() > parents) {
                                         //sort by distance
@@ -232,6 +160,24 @@ void ChimeraSlayer::getChimeras() {
                                 //put seqs into vector to send to slayer
                                 vector<Sequence*> seqsForSlayer;
                                 for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+//ofstream out;
+//string name = querySeqs[i]->getName();
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(name.find_first_of("|")+1);
+//cout << name << endl;
+//name = name.substr(0, name.find_last_of("|"));
+//cout << name << endl;
+//string filename = name + ".seqsforslayer";
+//openOutputFile(filename, out);       
+//for (int u = 0; u < seqsForSlayer.size(); u++) { seqsForSlayer[u]->printSequence(out);       }
+//out.close();
+//filename = name + ".fasta";
+//openOutputFile(filename, out);       
+//q->printSequence(out);
+//out.close();
+
                         
                                 //mask then send to slayer...
                                 if (seqMask != "") {
@@ -245,6 +191,10 @@ void ChimeraSlayer::getChimeras() {
                                                 decalc->runMask(seqsForSlayer[k]);
                                         }
                                         
+                                       for (int i = 0; i < numSeqs; i++) {
+                                               spotMap[i] = decalc->getMaskMap();
+                                       }
+
                                 }
                                 
                                 //send to slayer
@@ -253,7 +203,7 @@ void ChimeraSlayer::getChimeras() {
                         
                                 //free memory
                                 for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
-                       }
+                       //}
                         
                 }       
                 //free memory
@@ -296,6 +246,34 @@ Sequence* ChimeraSlayer::getSequence(string name) {
         }
  }
  //***************************************************************************************************************
+void ChimeraSlayer::printBlock(data_struct data, ostream& out, int i){
+       try {
+               
+               out << "parentA: " << data.parentA.getName() << "  parentB: " << data.parentB.getName()  << endl;
+               out << "Left Window: " << spotMap[i][data.winLStart] << " " << spotMap[i][data.winLEnd] << endl;
+               out << "Right Window: " << spotMap[i][data.winRStart] << " " << spotMap[i][data.winREnd] << endl;
+               
+               out << "Divergence of Query to Leftside ParentA and Rightside ParentB: " << data.divr_qla_qrb << '\t' << "Bootstrap: " << data.bsa << endl;
+               out << "Divergence of Query to Rightside ParentA and Leftside ParentB: " << data.divr_qlb_qra << '\t' << "Bootstrap: " << data.bsb << endl;
+               
+               out << "Similarity of parents: " << data.ab << endl;
+               out << "Similarity of query to parentA: " << data.qa << endl;
+               out << "Similarity of query to parentB: " << data.qb << endl;
+               
+               //out << "Per_id(QL,A): " << data.qla << endl;
+               //out << "Per_id(QL,B): " << data.qlb << endl;
+               //out << "Per_id(QR,A): " << data.qra << endl;
+               //out << "Per_id(QR,B): " << data.qrb << endl;
  
+               
+               out << "DeltaL: " << (data.qla - data.qlb) << endl;
+               out << "DeltaR: " << (data.qra - data.qrb) << endl;
  
+       }
+       catch(exception& e) {
+               errorOut(e, "ChimeraSlayer", "printBlock");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
  
diff --git a/chimeraslayer.h b/chimeraslayer.h

index a305b5bfd09601220b15a44fd0ad18ee6b42a09d..379209ef382b47a8203895d85710f35926bf188e 100644 (file)
--- a/chimeraslayer.h
+++ b/chimeraslayer.h
@@ -40,6 +40,7 @@ class ChimeraSlayer : public Chimera {
                 vector<linePair*> lines;
                 vector<Sequence*> querySeqs;
                 vector<Sequence*> templateSeqs;
+               vector< map<int, int> > spotMap;
                 
                 vector< vector<data_struct> > chimeraResults;
                 vector<string> chimeraFlags;
@@ -47,8 +48,7 @@ class ChimeraSlayer : public Chimera {
                 string fastafile, templateFile;
                 
                 Sequence* getSequence(string);  //find sequence from name
-                       
-                               
+               void printBlock(data_struct, ostream&, int i);
  };
  
  /************************************************************************/
diff --git a/commandfactory.cpp b/commandfactory.cpp

index 433702466c5b531c84edb79a45018a589dceaf86..9adf6791505060d7379fbfc387e1ce6f5d9f5afd 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -56,6 +56,7 @@
  #include "systemcommand.h"
  #include "secondarystructurecommand.h"
  #include "getsharedotucommand.h"
+#include "getlistcountcommand.h"
  
  /***********************************************************/
  
@@ -110,6 +111,7 @@ CommandFactory::CommandFactory(){
         commands["system"]                              = "system";
         commands["align.check"]                 = "align.check";
         commands["get.sharedotu"]               = "get.sharedotu";
+       commands["get.listcount"]               = "get.listcount";
         commands["quit"]                                = "quit"; 
  
  }
@@ -174,6 +176,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                 else if(commandName == "system")                                {       command = new SystemCommand(optionString);                              }
                 else if(commandName == "align.check")                   {       command = new AlignCheckCommand(optionString);                  }
                 else if(commandName == "get.sharedotu")                 {       command = new GetSharedOTUCommand(optionString);                }
+               else if(commandName == "get.listcount")                 {       command = new GetListCountCommand(optionString);                }
                 else                                                                                    {       command = new NoCommand(optionString);                                  }
  
                 return command;
diff --git a/getlistcountcommand.cpp b/getlistcountcommand.cpp

new file mode 100644 (file)

index 0000000..f14e17a
--- /dev/null
+++ b/getlistcountcommand.cpp
@@ -0,0 +1,197 @@
+/*
+ *  getlistcountcommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/12/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "getlistcountcommand.h"
+
+//**********************************************************************************************************************
+GetListCountCommand::GetListCountCommand(string option){
+       try {
+               globaldata = GlobalData::getInstance();
+               abort = false;
+               allLines = 1;
+               labels.clear();
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string AlignArray[] =  {"list","label"};
+                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+               
+                       //check to make sure all parameters are valid for command
+                       for (map<string, string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       string ranRead = globaldata->getListFile();
+                       
+                       //check for required parameters
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if ((listfile == "not found") && (globaldata->getListFile() == ""))  { mothurOut("You must read a listfile before running the get.listcount command.");  mothurOutEndLine(); abort = true; }
+                       else if ((listfile == "not found") && (globaldata->getListFile() != "")) { listfile = globaldata->getListFile(); }
+                       else if (listfile == "not open") { abort = true; }      
+                       else { globaldata->setListFile(listfile); }
+               
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; }
+                       else { 
+                               if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
+                               else { allLines = 1;  }
+                       }
+                       
+                       //if the user has not specified any labels use the ones from read.otu
+                       if ((label == "") && (ranRead != "")) {  
+                               allLines = globaldata->allLines; 
+                               labels = globaldata->labels; 
+                       }
+               }
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "GetListCountCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void GetListCountCommand::help(){
+       try {
+               mothurOut("The get.listcount command can only be executed after a successful read.otu command of a listfile or providing a list file using the list parameter.\n");
+               mothurOut("The get.listcount command parameters are list and label.  No parameters are required.\n");
+               mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n");
+               mothurOut("The get.listcount command should be in the following format: get.listcount(list=yourlistFile, label=yourLabels).\n");
+               mothurOut("Example get.listcount(list=amazon.fn.list, label=0.10).\n");
+               mothurOut("The default value for label is all lines in your inputfile.\n");
+               mothurOut("The get.listcount command outputs a .otu file for each distance you specify listing the bin number and the names of the sequences in that bin.\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+GetListCountCommand::~GetListCountCommand(){}
+
+//**********************************************************************************************************************
+
+int GetListCountCommand::execute(){
+       try {
+               if (abort == true) {    return 0;       }
+
+               globaldata->setFormat("list");
+               
+               //read list file
+               read = new ReadOTUFile(listfile);       
+               read->read(&*globaldata); 
+               
+               input = globaldata->ginput;
+               list = globaldata->gListVector;
+               string lastLabel = list->getLabel();
+
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+
+               while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+                       
+                       if(allLines == 1 || labels.count(list->getLabel()) == 1){
+                       
+                               process(list);
+                                                       
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                       }
+                       
+                       if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               delete list;
+                               list = input->getListVector(lastLabel);
+                               
+                               process(list);
+                                                                                                       
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       delete list;
+                       list = input->getListVector();
+               }
+               
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               mothurOut(". I will use " + lastLabel + "."); mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               mothurOut(". Please refer to " + lastLabel + ".");  mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (list != NULL) {             delete list;    }
+                       list = input->getListVector(lastLabel);
+                               
+                       process(list);                  
+                       delete list;  
+               }
+               
+               delete read;
+               globaldata->gListVector = NULL;
+               
+               return 0;
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+//return 1 if error, 0 otherwise
+void GetListCountCommand::process(ListVector* list) {
+       try {
+               string binnames, name, sequence;
+               string outputFileName = getRootName(listfile) + list->getLabel() + ".otu";
+               openOutputFile(outputFileName, out);
+               
+               mothurOut(list->getLabel()); mothurOutEndLine();
+               
+               //for each bin in the list vector
+               for (int i = 0; i < list->getNumBins(); i++) {
+                       binnames = list->get(i);
+                       out << i+1 << '\t' << binnames << endl;
+               }
+               
+               out.close();
+       }
+       catch(exception& e) {
+               errorOut(e, "GetListCountCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/getlistcountcommand.h b/getlistcountcommand.h

new file mode 100644 (file)

index 0000000..468ea80
--- /dev/null
+++ b/getlistcountcommand.h
@@ -0,0 +1,44 @@
+#ifndef GETLISTCOUNTCOMMAND_H
+#define GETLISTCOUNTCOMMAND_H
+/*
+ *  getlistcountcommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/12/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "listvector.hpp"
+#include "readotu.h"
+
+class GlobalData;
+
+/**********************************************************/
+
+class GetListCountCommand : public Command {
+       
+public:
+       GetListCountCommand(string);    
+       ~GetListCountCommand();
+       int execute();
+       void help();    
+       
+private:
+       GlobalData* globaldata;
+       ListVector* list;
+       ReadOTUFile* read;
+       InputData* input;
+       
+       bool abort, allLines;
+       set<string> labels; //holds labels to be used
+       string label, listfile;
+       ofstream out;
+       
+       void process(ListVector*);
+};
+/**********************************************************/
+
+#endif
diff --git a/helpcommand.cpp b/helpcommand.cpp

index ba4f53d942b899d4dc02d65c8acc68a63fdd45f7..fc2f291548e07ebc41e03bf121760d23f2c4a16d 100644 (file)
--- a/helpcommand.cpp
+++ b/helpcommand.cpp
@@ -32,7 +32,7 @@ int HelpCommand::execute(){
         
         delete validCommands;
         
-       mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at pschloss@umich.edu.\n");
+       mothurOutEndLine(); mothurOut("For further assistance please refer to the Mothur manual on our wiki at http://www.mothur.org/wiki, or contact Pat Schloss at mothur.bugs@gmail.com.\n");
         return 0;
  }
  
diff --git a/maligner.cpp b/maligner.cpp

index c7f9be02054510ef2d7a995139fab5d273f91809..26ac563032bd93c8f46810d84b1c1a1ceb16c231 100644 (file)
--- a/maligner.cpp
+++ b/maligner.cpp
@@ -16,6 +16,8 @@ Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, flo
  string Maligner::getResults(Sequence* q) {
         try {
                 
+               outputResults.clear();
+               
                 //make copy so trimming doesn't destroy query from calling class - remember to deallocate
                 query = new Sequence(q->getName(), q->getAligned());
                 
@@ -25,7 +27,7 @@ string Maligner::getResults(Sequence* q) {
                 
                 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
                 refSeqs = decalc->findClosest(query, db, numWanted);
-               
+       
                 refSeqs = minCoverageFilter(refSeqs);
                 
                 if (refSeqs.size() < 2)  { 
diff --git a/mothur.h b/mothur.h

index 836e0660dae78b87fe71e20bc95f59506c81d5dd..84dbdb3dee2879829909b38654c80057d9d5fdb5 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -267,7 +267,7 @@ inline void errorOut(exception& e, string object, string function) {
         
                 mothurOut("Error: ");
                 mothurOut(toString(e.what()));
-               mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at pschloss@umich.edu, and be sure to include the mothur.logFile with your inquiry.");
+               mothurOut(" has occurred in the " + object + " class function " + function + ". Please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
                 mothurOutEndLine();
         
  }
diff --git a/nast.cpp b/nast.cpp

index 294940cd6aabd49f67aca028311d21529f0912ee..f247037cc915fd1d54d9d1e050abde8f73711fba 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -226,7 +226,7 @@ void Nast::regapSequences(){        //This is essentially part B in Fig 2. of DeSantis
                         candidateSeq->setAligned(candAln);
                         return;
                 }
-               
+       
                 int fullAlignIndex = 0;
                 int pairwiseAlignIndex = 0;
                 string newTemplateAlign = "";                                   //      this is going to be messy so we want a temporary template
@@ -323,7 +323,9 @@ void Nast::regapSequences(){        //This is essentially part B in Fig 2. of DeSantis
                         newTemplateAlign += tempAln[i];//
                 }
                 
-               int start, end;
+               int start = 0;
+               int end = candAln.length()-1;
+
                 for(int i=0;i<candAln.length();i++){
                         if(candAln[i] == 'Z' || !isalnum(candAln[i]))   {       candAln[i] = '.';       }       //      if we padded the alignemnt from
                         else{                   start = i;                      break;          }                                                       //      blast with Z's, change them to
@@ -338,11 +340,11 @@ void Nast::regapSequences(){      //This is essentially part B in Fig 2. of DeSantis
                         candAln[i] = toupper(candAln[i]);                       //      everything is upper case
                 }
                 
-       
+
                 if(candAln.length() != tempAln.length()){               //      if the regapped candidate sequence is longer than the official
                         removeExtraGaps(candAln, tempAln, newTemplateAlign);//  template alignment then we need to do steps C-F in Fig.
                 }                                                                                               //      2 of Desantis et al.
-                       
+
                 candidateSeq->setAligned(candAln);
         }
         catch(exception& e) {
diff --git a/pintail.h b/pintail.h

index 37efd00b86793f064770784035d5e7dbfb78ee15..aada728413ffbfc9cd3cde1bedb5d07ea492e985 100644 (file)
--- a/pintail.h
+++ b/pintail.h
@@ -14,6 +14,7 @@
  #include "dist.h"
  #include "decalc.h"
  
+/***********************************************************/
  //This class was created using the algorythms described in the 
  // "At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies" paper 
  //by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.
diff --git a/slayer.cpp b/slayer.cpp

index 6e7f0e3b96eb60773d1bd0fdd2e81d84767160b1..95734c74808a3addd63b86ba8151c1f3d7bf039d 100644 (file)
--- a/slayer.cpp
+++ b/slayer.cpp
@@ -10,12 +10,11 @@
  #include "slayer.h"
  
  /***********************************************************************/
-Slayer::Slayer(int win, int increment, int parentThreshold, float div) :
-               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {}
+Slayer::Slayer(int win, int increment, int parentThreshold, float div, int i) :
+               windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div), iters(i){}
  /***********************************************************************/
  string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
         try {
-cout << "refSeqs = " << refSeqs.size() << endl;                
                 vector<data_struct> all; all.clear();
                 
                 for (int i = 0; i < refSeqs.size(); i++) {
@@ -46,7 +45,6 @@ cout << "refSeqs = " << refSeqs.size() << endl;
                                                 
                                                 float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
                                                 float snpRateRight = numSNPSRight / (float) winSizeRight;
-                                               
                                                 float logR = log(snpRateLeft / snpRateRight) / log(2);
                                                 
                                                 // do not accept excess snp ratio on either side of the break
@@ -54,7 +52,7 @@ cout << "refSeqs = " << refSeqs.size() << endl;
                                                         
                                                         float BS_A, BS_B;
                                                         bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
-                                               
+
                                                         divs[k].bsa = BS_A;
                                                         divs[k].bsb = BS_B;
                                                 
@@ -101,18 +99,17 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
         try{
                 
                 vector<data_struct> data;
-cout << q->getName() << '\t' << q->getAligned().length() << endl;              
+                               
                 //vertical filter
                 vector<Sequence*> temp;
                 temp.push_back(q); temp.push_back(pA); temp.push_back(pB);
-               verticalFilter(temp);
+               map<int, int> spots = verticalFilter(temp);
                 
                 //get these to avoid numerous function calls
                 string query = q->getAligned();
                 string parentA = pA->getAligned();
                 string parentB = pB->getAligned();
                 int length = query.length();
-cout << q->getName() << '\t' << length << endl;
                 
                 //check window size
                 if (length < (2*windowSize+windowStep)) { 
@@ -170,10 +167,10 @@ cout << q->getName() << '\t' << length << endl;
                                         member.rab = RAB; 
                                         member.qra = QRA; 
                                         member.qlb = QLB; 
-                                       member.winLStart = 0;
-                                       member.winLEnd = breakpoint; 
-                                       member.winRStart = breakpoint+1; 
-                                       member.winREnd = length-1; 
+                                       member.winLStart = spots[0];
+                                       member.winLEnd = spots[breakpoint];  //so breakpoint reflects spot in alignment before filter
+                                       member.winRStart = spots[breakpoint+1]; 
+                                       member.winREnd = spots[length-1]; 
                                         member.querySeq = *(q); 
                                         member.parentA = *(pA);
                                         member.parentB = *(pB);
@@ -238,15 +235,16 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                 int numLeft = max(1, int(left.size()/10 +0.5));
                 int numRight = max(1, int(right.size()/10 + 0.5));
                 
-               for (int i = 0; i < 100; i++) {
+               for (int i = 0; i < iters; i++) {
                         //random sampling with replacement.
                 
                         vector<snps> selectedLeft;
+
                         for (int j = 0; j < numLeft; j++) {
                                 int index = int(rand() % left.size());
                                 selectedLeft.push_back(left[index]);
                         }
-               
+
                         vector<snps> selectedRight;
                         for (int j = 0; j < numRight; j++) {
                                 int index = int(rand() % right.size());
@@ -268,7 +266,7 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                 
                         float QLB = snpQB(selectedLeft);
                         float QRB = snpQB(selectedRight);
-                       
+       
                         //in original - not used - not sure why?
                         //float ALB = snpAB(selectedLeft);
                         //float ARB = snpAB(selectedRight);
@@ -283,8 +281,8 @@ void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, fl
                 
                 }
  
-               BSA = (float) count_A;
-               BSB = (float) count_B;
+               BSA = ((float) count_A / (float) iters) * 100;
+               BSB = ((float) count_B / (float) iters) * 100;
         
         }
         catch(exception& e) {
@@ -304,7 +302,7 @@ float Slayer::snpQA(vector<snps> data) {
                         }
                 }
  
-               float percentID = (numIdentical / data.size()) * 100;
+               float percentID = (numIdentical / (float) data.size()) * 100;
                 
                 return percentID;
         }
@@ -325,7 +323,7 @@ float Slayer::snpQB(vector<snps> data) {
                         }
                 }
  
-               float percentID = (numIdentical / data.size()) * 100;
+               float percentID = (numIdentical / (float) data.size()) * 100;
                 
                 return percentID;
  
@@ -346,7 +344,7 @@ float Slayer::snpAB(vector<snps> data) {
                         }
                 }
  
-               float percentID = (numIdentical / data.size()) * 100;
+               float percentID = (numIdentical / (float) data.size()) * 100;
                 
                 return percentID;
  
@@ -380,7 +378,7 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
  }
  /***********************************************************************/
  //remove columns that contain any gaps
-void Slayer::verticalFilter(vector<Sequence*> seqs) {
+map<int, int> Slayer::verticalFilter(vector<Sequence*> seqs) {
         try {
                 vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
                 
@@ -399,11 +397,17 @@ void Slayer::verticalFilter(vector<Sequence*> seqs) {
                 
                 //zero out spot where all sequences have blanks
                 int numColRemoved = 0;
+               int count = 0;
+               map<int, int> maskMap; maskMap.clear();
  
                 for(int i = 0; i < seqs[0]->getAligned().length(); i++){
                         if(gaps[i] != 0)        {       filterString[i] = '0';  numColRemoved++;  }
+                       else {
+                               maskMap[count] = i;
+                               count++;
+                       }
                 }
-
+               
                 //for each sequence
                 for (int i = 0; i < seqs.size(); i++) {
                 
@@ -417,6 +421,8 @@ void Slayer::verticalFilter(vector<Sequence*> seqs) {
                         
                         seqs[i]->setAligned(newAligned);
                 }
+               
+               return maskMap;
         }
         catch(exception& e) {
                 errorOut(e, "Slayer", "verticalFilter");
diff --git a/slayer.h b/slayer.h

index ea79727561fbfad37dfaf1478276e529b79df22b..dc13e6daa3f1e4ce9e2340da2cf201c9881a3083 100644 (file)
--- a/slayer.h
+++ b/slayer.h
@@ -64,7 +64,7 @@ class Slayer {
  
         public:
                 
-               Slayer(int, int, int, float);
+               Slayer(int, int, int, float, int);
                 ~Slayer() {};
                 
                 string getResults(Sequence*, vector<Sequence*>);
@@ -73,11 +73,11 @@ class Slayer {
                                 
         private:
                 
-               int windowSize, windowStep, parentFragmentThreshold;
+               int windowSize, windowStep, parentFragmentThreshold, iters;
                 float divRThreshold; 
                 vector<data_struct>  outputResults;
                 
-               void verticalFilter(vector<Sequence*>);
+               map<int, int> verticalFilter(vector<Sequence*>);
                 float computePercentID(string, string, int, int);
                 
                 vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);
author	westcott <westcott>
	Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
committer	westcott <westcott>
	Mon, 12 Oct 2009 21:26:13 +0000 (21:26 +0000)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
aligncommand.cpp		patch \| blob \| history
ccode.h		patch \| blob \| history
chimera.h		patch \| blob \| history
chimeracheckrdp.h		patch \| blob \| history
chimeraseqscommand.cpp		patch \| blob \| history
chimeraslayer.cpp		patch \| blob \| history
chimeraslayer.h		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
getlistcountcommand.cpp	[new file with mode: 0644]	patch \| blob
getlistcountcommand.h	[new file with mode: 0644]	patch \| blob
helpcommand.cpp		patch \| blob \| history
maligner.cpp		patch \| blob \| history
mothur.h		patch \| blob \| history
nast.cpp		patch \| blob \| history
pintail.h		patch \| blob \| history
slayer.cpp		patch \| blob \| history
slayer.h		patch \| blob \| history