]> git.donarmstrong.com Git - mothur.git/commitdiff
working on chimeraslayer and found bug in shared command.
authorwestcott <westcott>
Tue, 29 Sep 2009 16:05:10 +0000 (16:05 +0000)
committerwestcott <westcott>
Tue, 29 Sep 2009 16:05:10 +0000 (16:05 +0000)
chimera.h
chimeraseqscommand.cpp
chimeraseqscommand.h
chimeraslayer.cpp
chimeraslayer.h
maligner.cpp
sharedcommand.cpp
slayer.cpp
slayer.h

index 3efe0e4b63095042de78f3938118c7ab1d642656..2d6ac0b2a7495bbf2408e38b18f52e0c0c330dcc 100644 (file)
--- a/chimera.h
+++ b/chimera.h
@@ -76,6 +76,7 @@ class Chimera {
                virtual void setDivR(float d)                   {       divR = d;                       }
                virtual void setParents(int p)                  {       parents = p;            }
                virtual void setMinSim(int s)                   {       minSim = s;                     }
+               virtual void setPrint(int p)                    {       printAll = p;           }
 
                
                virtual void setCons(string){};
@@ -93,7 +94,7 @@ class Chimera {
                
        protected:
                
-               bool filter, correction, svg;
+               bool filter, correction, svg, printAll;
                int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents;
                float divR;
                string seqMask, quanfile, filterString, name;
index 925de63c17bf44011e9bb29e1e4b9a7a980fd3ff..f368869449242ff52fc4c6bce4057a0a17013798 100644 (file)
@@ -26,7 +26,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents" };
+                       string Array[] =  {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "printall" };
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -78,6 +78,9 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){
                        temp = validParameter.validFile(parameters, "correction", false);               if (temp == "not found") { temp = "T"; }
                        correction = isTrue(temp);
                        
+                       temp = validParameter.validFile(parameters, "printall", false);                 if (temp == "not found") { temp = "F"; }
+                       printAll = isTrue(temp);
+                       
                        temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
                        convert(temp, processors);
                        
@@ -217,6 +220,7 @@ int ChimeraSeqsCommand::execute(){
                chimera->setDivR(divR);
                chimera->setParents(parents);
                chimera->setMinSim(minSimilarity);
+               chimera->setPrint(printAll);
                
                                
                //find chimeras
index 865a84d378d0429a77e833aa5b1db7eca097a83f..f19a79d230642894a8bbb7ab9d459f43368fd5dd 100644 (file)
@@ -29,7 +29,7 @@ private:
        
        bool abort;
        string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile;
-       bool filter, correction, svg;
+       bool filter, correction, svg, printAll;
        int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity;
        float divR;
        Chimera* chimera;
index 293ee807d5b875728716fddde17cd6c032f318e1..bd1908dd035f6a123ee1f749dfb47579ef5b831d 100644 (file)
@@ -26,9 +26,119 @@ ChimeraSlayer::~ChimeraSlayer() {
 //***************************************************************************************************************
 void ChimeraSlayer::print(ostream& out) {
        try {
-               
                mothurOutEndLine();
                
+               for (int i = 0; i < querySeqs.size(); i++) {
+               
+                       if (chimeraFlags[i] == "yes") { 
+                               mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine();
+                       
+                       }else{
+                               out << querySeqs[i]->getName() << "\tno" << endl;
+                               mothurOut("no");
+                       }
+               }
+/*             
+       
+       my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB};
+       my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA};
+       
+       my $per_id_QLA = $data_struct->{per_id_QLA};
+       my $per_id_QRB = $data_struct->{per_id_QRB};
+       my $per_id_AB = $data_struct->{per_id_AB};
+       my $per_id_QA = $data_struct->{per_id_QA};
+       my $per_id_QB = $data_struct->{per_id_QB}; 
+       my $per_id_LAB = $data_struct->{per_id_LAB};
+       my $per_id_RAB = $data_struct->{per_id_RAB};
+       my $per_id_QRA = $data_struct->{per_id_QRA};
+       my $per_id_QLB = $data_struct->{per_id_QLB};
+       my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA};
+       my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB};
+       
+       my $win_left_end5 = $data_struct->{win_left_end5};
+       my $win_left_end3 = $data_struct->{win_left_end3};
+       my $win_right_end5 = $data_struct->{win_right_end5};
+       my $win_right_end3 = $data_struct->{win_right_end3};
+       my $Q = $data_struct->{query_alignment};
+       my $A = $data_struct->{parent_A_alignment};
+       my $B = $data_struct->{parent_B_alignment}; 
+       my $BS_A = $data_struct->{BS_A};
+       my $BS_B = $data_struct->{BS_B};
+       
+       my @Q_chars = @{$Q->{align}};
+       my @A_chars = @{$A->{align}};
+       my @B_chars = @{$B->{align}};
+       
+       my $query_acc = $Q->{acc};
+       my $A_acc = $A->{acc};
+       my $B_acc = $B->{acc};
+       
+       my $break_left = $Q->{seqPos}->[$win_left_end3];
+       my $break_right = $Q->{seqPos}->[$win_right_end5];
+       
+       
+       cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl  
+               << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB)
+               << "\tBS_QLARB: " . sprintf("%.2f", $BS_A)
+               << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB)
+               << "\tBS_QRALB: " . sprintf("%.2f", $BS_B)
+               << "\t$A_acc\t$B_acc" 
+               << "\tbreakpoint: $break_left-$break_right\n\n";
+       
+       ## draw illustration:
+
+       print "            Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n";
+       print "           Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n";
+       print "--------------------------------------------------- A: $A_acc\n"
+               . " " . sprintf("%.2f", $per_id_QLA) . "                                " . sprintf("%.2f", $per_id_QRA) . "\n"
+               . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
+               . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . "     |\n"
+               . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . "   |\n"
+               . "                         |\n"
+               . "   (L-AB: " . sprintf("%.2f", $per_id_LAB) . ")         |      (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n"
+               . "   WinL:$win_left_end5-$win_left_end3            |      WinR:$win_right_end5-$win_right_end3\n"
+               . "                         |\n"
+               . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . "   |\n"
+               . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . "    |\n"
+               . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n"
+               . " " . sprintf("%.2f", $per_id_QLB) . "                                " . sprintf("%.2f", $per_id_QRB) . "\n"
+               . "---------------------------------------------------- B: $B_acc\n";
+       print "            Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n";
+       
+       my $deltaL = $per_id_QLA - $per_id_QLB;
+       my $deltaR = $per_id_QRA - $per_id_QRB;
+
+       print "DeltaL: " . sprintf("%.2f", $deltaL) . "                   DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n";
+       
+       unless ($printAlignmentsFlag) { return; }
+       
+       
+       ## build the left windows:
+       my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3];
+       my @A_left_win = @A_chars[$win_left_end5..$win_left_end3];
+       my @B_left_win = @B_chars[$win_left_end5..$win_left_end3];
+       
+       &print_alignment($A_acc, \@A_left_win, 
+                                        $query_acc, \@Q_left_win, 
+                                        $B_acc, \@B_left_win);
+       
+       print "\t\t** Breakpoint **\n\n";
+       
+       my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3];
+       my @A_right_win = @A_chars[$win_right_end5..$win_right_end3];
+       my @B_right_win = @B_chars[$win_right_end5..$win_right_end3];
+       
+       &print_alignment($A_acc, \@A_right_win, 
+                                        $query_acc, \@Q_right_win, 
+                                        $B_acc, \@B_right_win);
+       
+       return;
+}
+
+
+####
+               
+       */      
                                
        }
        catch(exception& e) {
@@ -49,6 +159,9 @@ void ChimeraSlayer::getChimeras() {
                
                int numSeqs = querySeqs.size();
                
+               chimeraResults.resize(numSeqs);
+               chimeraFlags.resize(numSeqs, "no");
+               
                //break up file if needed
                int linesPerProcess = numSeqs / processors ;
                
@@ -80,11 +193,10 @@ void ChimeraSlayer::getChimeras() {
                        float percentIdentical = maligner->getPercentID();
                        vector<results> Results = maligner->getOutput();
                        
-                       cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
+                       //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl;
                        
                        for (int j = 0; j < Results.size(); j++) {
-                               cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
-                               
+                               //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl;
                        }
                        
                        if (chimeraFlag == "yes") {
@@ -116,7 +228,7 @@ void ChimeraSlayer::getChimeras() {
                                                seqs.pop_back();        
                                        }
                                }
-                               
+               
                                //put seqs into vector to send to slayer
                                vector<Sequence*> seqsForSlayer;
                                for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
@@ -136,8 +248,9 @@ void ChimeraSlayer::getChimeras() {
                                }
                                
                                //send to slayer
-                               slayer->getResults(querySeqs[i], seqsForSlayer);
-                               
+                               chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer);
+                               chimeraResults[i] = slayer->getOutput();
+                       
                                //free memory
                                for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
                        }
index 617ba7ee9557eb6ad54f65b4aa99f323fd5afd86..a305b5bfd09601220b15a44fd0ad18ee6b42a09d 100644 (file)
@@ -40,6 +40,9 @@ class ChimeraSlayer : public Chimera {
                vector<linePair*> lines;
                vector<Sequence*> querySeqs;
                vector<Sequence*> templateSeqs;
+               
+               vector< vector<data_struct> > chimeraResults;
+               vector<string> chimeraFlags;
                                
                string fastafile, templateFile;
                
index 824a2cc0ce19d3d2a7556c8e58776ea4b2cc9895..c7f9be02054510ef2d7a995139fab5d273f91809 100644 (file)
@@ -26,12 +26,6 @@ string Maligner::getResults(Sequence* q) {
                //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
                refSeqs = decalc->findClosest(query, db, numWanted);
                
-               ofstream out;
-               string outFile = "parentsOf" + query->getName();
-               openOutputFile(outFile, out);
-               for (int i = 0; i < refSeqs.size(); i++) {   refSeqs[i]->printSequence(out);    }
-               out.close();
-               
                refSeqs = minCoverageFilter(refSeqs);
                
                if (refSeqs.size() < 2)  { 
index 2ce9bbcded2ba51409f9d130489953a3753ea438..62d8b3a3cf3f07a42a1d1f77c3ce21d57d526cdb 100644 (file)
@@ -223,10 +223,10 @@ void SharedCommand::createMisMatchFile() {
                        map<string, string> namesInList;
                        
                        //go through listfile and get names
-                       for (int i = 0; i < SharedList->getNumSeqs(); i++) {
+                       for (int i = 0; i < SharedList->getNumBins(); i++) {
                                
                                string names = SharedList->get(i); 
-                               
+               
                                while (names.find_first_of(',') != -1) { 
                                        string name = names.substr(0,names.find_first_of(','));
                                        names = names.substr(names.find_first_of(',')+1, names.length());
index d838a2e86857bbce9a3375b795abe17964cbeaf0..6e7f0e3b96eb60773d1bd0fdd2e81d84767160b1 100644 (file)
 Slayer::Slayer(int win, int increment, int parentThreshold, float div) :
                windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {}
 /***********************************************************************/
-void Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
+string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        try {
+cout << "refSeqs = " << refSeqs.size() << endl;                
+               vector<data_struct> all; all.clear();
                
                for (int i = 0; i < refSeqs.size(); i++) {
                
@@ -27,14 +29,67 @@ void Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                
                                vector<data_struct> divs = runBellerophon(q, leftParent, rightParent);
                                
+                               vector<data_struct> selectedDivs;
+                               for (int k = 0; k < divs.size(); k++) {
+                               
+                                       vector<snps> snpsLeft = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winLStart, divs[k].winLEnd);
+                                       vector<snps> snpsRight = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winRStart, divs[k].winREnd);
+                                       
+                                       int numSNPSLeft = snpsLeft.size();
+                                       int numSNPSRight = snpsRight.size();
+                                       
+                                       //require at least 3 SNPs on each side of the break
+                                       if ((numSNPSLeft >= 3) && (numSNPSRight >= 3)) {
+                                               
+                                               int winSizeLeft = divs[k].winLEnd - divs[k].winLStart + 1;
+                                               int winSizeRight = divs[k].winREnd - divs[k].winRStart + 1;
+                                               
+                                               float snpRateLeft = numSNPSLeft / (float) winSizeLeft;
+                                               float snpRateRight = numSNPSRight / (float) winSizeRight;
+                                               
+                                               float logR = log(snpRateLeft / snpRateRight) / log(2);
+                                               
+                                               // do not accept excess snp ratio on either side of the break
+                                               if (abs(logR) < 1 ) {  
+                                                       
+                                                       float BS_A, BS_B;
+                                                       bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B);
+                                               
+                                                       divs[k].bsa = BS_A;
+                                                       divs[k].bsb = BS_B;
+                                               
+                                                       divs[k].bsMax = max(BS_A, BS_B);
+                                               
+                                                       divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra);
+                                               
+                                                       selectedDivs.push_back(divs[k]);
+                                               }
+                                       }
+                               }
+                               
+                               //save selected
+                               for (int m = 0; m < selectedDivs.size(); m++) {  all.push_back(selectedDivs[m]);        }
+                               
                                delete q;
                                delete leftParent;
                                delete rightParent;
                        }
-                       
                }
                
                
+               // compute bootstrap support
+               if (all.size() > 0) {
+                       //sort them
+                       sort(all.begin(), all.end(), compareDataStruct);
+                       reverse(all.begin(), all.end());
+                       
+                       outputResults = all;
+                       return "yes"; 
+               }
+               else {
+                       outputResults = all;
+                       return "no";
+               }
        }
        catch(exception& e) {
                errorOut(e, "Slayer", "getResults");
@@ -42,19 +97,96 @@ void Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        }
 }
 /***********************************************************************/
-vector<data_struct> Slayer::runBellerophon(Sequence* query, Sequence* parentA, Sequence* parentB) {
+vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* pB) {
        try{
                
                vector<data_struct> data;
-               
+cout << q->getName() << '\t' << q->getAligned().length() << endl;              
                //vertical filter
                vector<Sequence*> temp;
+               temp.push_back(q); temp.push_back(pA); temp.push_back(pB);
                verticalFilter(temp);
                
-               int alignLength = query->getAligned().length();
+               //get these to avoid numerous function calls
+               string query = q->getAligned();
+               string parentA = pA->getAligned();
+               string parentB = pB->getAligned();
+               int length = query.length();
+cout << q->getName() << '\t' << length << endl;
+               
+               //check window size
+               if (length < (2*windowSize+windowStep)) { 
+                       mothurOut("Your window size is too large for " + q->getName() + ". I will make the window size " + toString(length/4) + " which is 1/4 the filtered length."); mothurOutEndLine();      
+                       windowSize = length / 4;
+               }
+               
+               for (int i = windowSize-1; i <= (length - windowSize); i += windowStep) {
+               
+                       int breakpoint = i;
+                       int leftLength = breakpoint + 1;
+                       int rightLength = length - leftLength;
+                       
+                       float QLA = computePercentID(query, parentA, 0, breakpoint);
+                       float QRB = computePercentID(query, parentB, breakpoint+1, length - 1);
+               
+                       float QLB = computePercentID(query, parentB, 0, breakpoint);
+                       float QRA = computePercentID(query, parentA, breakpoint+1, length - 1);
                
+                       float LAB = computePercentID(parentA, parentB, 0, breakpoint);
+                       float RAB = computePercentID(parentA, parentB, breakpoint+1, length - 1);
                
+                       float AB = ((LAB*leftLength) + (RAB*rightLength)) / (float) length;
+                       float QA = ((QLA*leftLength) + (QRA*rightLength)) / (float) length;
+                       float QB = ((QLB*leftLength) + (QRB*rightLength)) / (float) length;
                
+                       float QLA_QRB = ((QLA*leftLength) + (QRB*rightLength)) / (float) length;
+                       float QLB_QRA = ((QLB*leftLength) + (QRA*rightLength)) / (float) length;
+               
+                       //in original and not used
+                       //float avgQA_QB = ((QA*leftLength) + (QB*rightLength)) / (float) length;
+               
+                       float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB));
+               
+                       float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB));
+
+                       //is one of them above the 
+                       if (divR_QLA_QRB >= divRThreshold || divR_QLB_QRA >= divRThreshold) {
+                               
+                               if (((QLA_QRB > QA) && (QLA_QRB > QB) && (QLA >= parentFragmentThreshold) && (QRB >= parentFragmentThreshold))  ||
+                                       ((QLB_QRA > QA) && (QLB_QRA > QB) && (QLB >=parentFragmentThreshold) && (QRA >= parentFragmentThreshold)))  {
+                                       
+                                       data_struct member;
+                                       
+                                       member.divr_qla_qrb = divR_QLA_QRB;
+                                       member.divr_qlb_qra = divR_QLB_QRA;
+                                       member.qla_qrb = QLA_QRB;
+                                       member.qlb_qra = QLB_QRA;
+                                       member.qla = QLA;
+                                       member.qrb = QRB;
+                                       member.ab = AB; 
+                                       member.qa = QA;
+                                       member.qb = QB; 
+                                       member.lab = LAB; 
+                                       member.rab = RAB; 
+                                       member.qra = QRA; 
+                                       member.qlb = QLB; 
+                                       member.winLStart = 0;
+                                       member.winLEnd = breakpoint; 
+                                       member.winRStart = breakpoint+1; 
+                                       member.winREnd = length-1; 
+                                       member.querySeq = *(q); 
+                                       member.parentA = *(pA);
+                                       member.parentB = *(pB);
+                                       member.bsa = 0;
+                                       member.bsb = 0;
+                                       member.bsMax = 0;
+                                       member.chimeraMax = 0;
+                                       
+                                       data.push_back(member);
+                                       
+                               }//if
+                       }//if
+               }//for
                
                return data;
                
@@ -65,6 +197,166 @@ vector<data_struct> Slayer::runBellerophon(Sequence* query, Sequence* parentA, S
        }
 }
 /***********************************************************************/
+vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int left, int right) {
+       try {
+       
+               vector<snps> data;
+
+               for (int i = left; i <= right; i++) {
+                       
+                       char A = parentA[i];
+                       char Q = query[i];
+                       char B = parentB[i];
+                       
+                       if ((A != Q) || (B != Q)) {
+                               snps member;
+                               member.queryChar = Q;
+                               member.parentAChar = A;
+                               member.parentBChar = B;
+                               
+                               data.push_back(member);
+                       }
+               }
+               
+               return data;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "getSNPS");
+               exit(1);
+       }
+}
+/***********************************************************************/
+void Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, float& BSB) {
+       try {
+
+               srand((unsigned)time( NULL ));
+
+               int count_A = 0; // sceneario QLA,QRB supported
+               int count_B = 0; // sceneario QLB,QRA supported
+       
+               int numLeft = max(1, int(left.size()/10 +0.5));
+               int numRight = max(1, int(right.size()/10 + 0.5));
+               
+               for (int i = 0; i < 100; i++) {
+                       //random sampling with replacement.
+               
+                       vector<snps> selectedLeft;
+                       for (int j = 0; j < numLeft; j++) {
+                               int index = int(rand() % left.size());
+                               selectedLeft.push_back(left[index]);
+                       }
+               
+                       vector<snps> selectedRight;
+                       for (int j = 0; j < numRight; j++) {
+                               int index = int(rand() % right.size());
+                               selectedRight.push_back(right[index]);
+                       }
+               
+                       /* A  ------------------------------------------
+                       #       QLA                     QRA
+                       # Q  ------------------------------------------
+                       #                      |
+                       #                      |
+                       # Q  ------------------------------------------
+                       #       QLB                     QRB
+                       # B  ------------------------------------------ */
+               
+               
+                       float QLA = snpQA(selectedLeft);
+                       float QRA = snpQA(selectedRight);
+               
+                       float QLB = snpQB(selectedLeft);
+                       float QRB = snpQB(selectedRight);
+                       
+                       //in original - not used - not sure why?
+                       //float ALB = snpAB(selectedLeft);
+                       //float ARB = snpAB(selectedRight);
+               
+                       if ((QLA > QLB) && (QRB > QRA)) {
+                               count_A++;
+                       }
+               
+                       if ((QLB > QLA) && (QRA > QRB)) {
+                               count_B++;
+                       }
+               
+               }
+
+               BSA = (float) count_A;
+               BSB = (float) count_B;
+       
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "bootstrapSNPS");
+               exit(1);
+       }
+}
+/***********************************************************************/
+float Slayer::snpQA(vector<snps> data) {
+       try {
+       
+               int numIdentical = 0;
+       
+               for (int i = 0; i < data.size(); i++) {
+                       if (data[i].parentAChar == data[i].queryChar) {
+                               numIdentical++;
+                       }
+               }
+
+               float percentID = (numIdentical / data.size()) * 100;
+               
+               return percentID;
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "snpQA");
+               exit(1);
+       }
+}
+/***********************************************************************/
+float Slayer::snpQB(vector<snps> data) {
+       try {
+       
+               int numIdentical = 0;
+       
+               for (int i = 0; i < data.size(); i++) {
+                       if (data[i].parentBChar == data[i].queryChar) {
+                               numIdentical++;
+                       }
+               }
+
+               float percentID = (numIdentical / data.size()) * 100;
+               
+               return percentID;
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "snpQB");
+               exit(1);
+       }
+}
+/***********************************************************************/
+float Slayer::snpAB(vector<snps> data) {
+       try {
+               int numIdentical = 0;
+       
+               for (int i = 0; i < data.size(); i++) {
+                       if (data[i].parentAChar == data[i].parentBChar) {
+                               numIdentical++;
+                       }
+               }
+
+               float percentID = (numIdentical / data.size()) * 100;
+               
+               return percentID;
+
+       }
+       catch(exception& e) {
+               errorOut(e, "Slayer", "snpAB");
+               exit(1);
+       }
+}
+/***********************************************************************/
 float Slayer::computePercentID(string queryFrag, string parent, int left, int right) {
        try {
                int total = 0;
@@ -87,7 +379,7 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri
        }
 }
 /***********************************************************************/
-//this is a vertical filter
+//remove columns that contain any gaps
 void Slayer::verticalFilter(vector<Sequence*> seqs) {
        try {
                vector<int> gaps;       gaps.resize(seqs[0]->getAligned().length(), 0);
@@ -101,16 +393,17 @@ void Slayer::verticalFilter(vector<Sequence*> seqs) {
                        
                        for (int j = 0; j < seqAligned.length(); j++) {
                                //if this spot is a gap
-                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
+                               if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N'))        {       gaps[j]++;      }
                        }
                }
                
                //zero out spot where all sequences have blanks
                int numColRemoved = 0;
+
                for(int i = 0; i < seqs[0]->getAligned().length(); i++){
-                       if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
+                       if(gaps[i] != 0)        {       filterString[i] = '0';  numColRemoved++;  }
                }
-               
+
                //for each sequence
                for (int i = 0; i < seqs.size(); i++) {
                
index e898989b17fa73020a367fef11fa1d3b0f688565..ea79727561fbfad37dfaf1478276e529b79df22b 100644 (file)
--- a/slayer.h
+++ b/slayer.h
 /***********************************************************************/
 //This class was modeled after the chimeraSlayer written by the Broad Institute
 /***********************************************************************/
-struct data_struct { //not right needs work...
-       int regionStart;
-       int regionEnd;
-       string parent;
-       float queryToParent;
-       float queryToParentLocal;
-       float divR;
+struct data_struct { //this is crazy big...but follow original.
+       float divr_qla_qrb;
+       float divr_qlb_qra;
+       float qla_qrb;
+       float qlb_qra;
+       float qla;
+       float qrb;
+       float ab; 
+       float qa;
+       float qb; 
+       float lab; 
+       float rab; 
+       float qra; 
+       float qlb; 
+       int winLStart;
+       int winLEnd; 
+       int winRStart; 
+       int winREnd; 
+       Sequence querySeq; 
+       Sequence parentA;
+       Sequence parentB;
+       float bsa;
+       float bsb;
+       float bsMax;
+       float chimeraMax;
+       
 };
 /***********************************************************************/
+//sorts lowest to highest first by bsMax, then if tie by chimeraMax
+inline bool compareDataStruct(data_struct left, data_struct right){
+       if (left.bsMax < right.bsMax) { return true; }
+       else if (left.bsMax == right.bsMax) {
+               return (left.chimeraMax < right.chimeraMax);
+       }else { return false;   }
+} 
+/***********************************************************************/
+struct snps { 
+       char queryChar;
+       char parentAChar;
+       char parentBChar;
+};
+
+/***********************************************************************/
 
 
 class Slayer {
@@ -33,21 +67,25 @@ class Slayer {
                Slayer(int, int, int, float);
                ~Slayer() {};
                
-               void getResults(Sequence*, vector<Sequence*>);
-               //float getPercentID() {        return percentIdenticalQueryChimera;    }
-               //vector<results> getOutput()  {        return outputResults;                   }
+               string getResults(Sequence*, vector<Sequence*>);
+               vector<data_struct> getOutput()  {      return outputResults;                   }
                
                                
        private:
                
                int windowSize, windowStep, parentFragmentThreshold;
                float divRThreshold; 
+               vector<data_struct>  outputResults;
                
                void verticalFilter(vector<Sequence*>);
                float computePercentID(string, string, int, int);
                
                vector<data_struct> runBellerophon(Sequence*, Sequence*, Sequence*);
-               
+               vector<snps> getSNPS(string, string, string, int, int);
+               void bootstrapSNPS(vector<snps>, vector<snps>, float&, float&);
+               float snpQA(vector<snps>);
+               float snpQB(vector<snps>);
+               float snpAB(vector<snps>);
                                
 };