]> git.donarmstrong.com Git - mothur.git/blobdiff - slayer.cpp
paralellized seq.error and dist.shared added some error checks to libshuff and dist...
[mothur.git] / slayer.cpp
index a244ea42634fc9b31218aceaafd3d76f249d92e8..4987d4170337f6d4e172c395c137be71832759ed 100644 (file)
@@ -17,7 +17,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
        try {
                vector<data_struct> all; all.clear();
                myQuery = *query;
-               
+
                for (int i = 0; i < refSeqs.size(); i++) {
                
                        for (int j = i+1; j < refSeqs.size(); j++) {
@@ -28,12 +28,17 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                Sequence* q = new Sequence(query->getName(), query->getAligned());
                                Sequence* leftParent = new Sequence(refSeqs[i]->getName(), refSeqs[i]->getAligned());
                                Sequence* rightParent = new Sequence(refSeqs[j]->getName(), refSeqs[j]->getAligned());
+                               
+                               //cout << q->getName() << endl << q->getAligned() << endl << endl;      
+                               //cout << leftParent->getName() << endl << leftParent->getAligned() << endl << endl;            
+                               //cout << rightParent->getName() << endl << rightParent->getAligned() << endl << endl;  
+                               //cout << " length = " << rightParent->getAligned().length() << endl;
        
                                map<int, int> spots;  //map from spot in original sequence to spot in filtered sequence for query and both parents
                                vector<data_struct> divs = runBellerophon(q, leftParent, rightParent, spots);
        
                                if (m->control_pressed) { delete q; delete leftParent; delete rightParent; return "no"; }
-                                       
+//                             cout << "examining:\t" << refSeqs[i]->getName() << '\t' << refSeqs[j]->getName() << endl;
                                vector<data_struct> selectedDivs;
                                for (int k = 0; k < divs.size(); k++) {
                                        
@@ -45,6 +50,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                        int numSNPSLeft = snpsLeft.size();
                                        int numSNPSRight = snpsRight.size();
                                        
+//                                     cout << numSNPSLeft << '\t' << numSNPSRight << endl;
                                        //require at least 4 SNPs on each side of the break
                                        if ((numSNPSLeft >= 4) && (numSNPSRight >= 4)) {
                                                        
@@ -60,16 +66,16 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                                
                                                
                                                //are we within 10 points of the bootstrap cutoff?
-                                               if ((divs[k].bsMax >= (minBS-10)) && (iters < 1000)) {
-                                                       bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B, 1000);
-                                                               
-                                                       if (m->control_pressed) { delete q; delete leftParent; delete rightParent; return "no"; }
-                                                               
-                                                       divs[k].bsa = BS_A;
-                                                       divs[k].bsb = BS_B;
-                                                       divs[k].bsMax = max(BS_A, BS_B);
-                                                       divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra);
-                                               }
+//                                             if ((divs[k].bsMax >= (minBS-10)) && (iters < 1000)) {
+//                                                     bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B, 1000);
+//                                                             
+//                                                     if (m->control_pressed) { delete q; delete leftParent; delete rightParent; return "no"; }
+//                                                             
+//                                                     divs[k].bsa = BS_A;
+//                                                     divs[k].bsb = BS_B;
+//                                                     divs[k].bsMax = max(BS_A, BS_B);
+//                                                     divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra);
+//                                             }
                                                
                                                //so results reflect orignal alignment
                                                divs[k].winLStart = spots[divs[k].winLStart];
@@ -80,7 +86,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                                                selectedDivs.push_back(divs[k]);
                                        }
                                }
-                               
+
                                //save selected
                                for (int mi = 0; mi < selectedDivs.size(); mi++) {  all.push_back(selectedDivs[mi]);    }
                                
@@ -90,6 +96,7 @@ string Slayer::getResults(Sequence* query, vector<Sequence*> refSeqs) {
                        }
                }
                
+
                // compute bootstrap support
                if (all.size() > 0) {
                        //sort them
@@ -144,16 +151,16 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                        int breakpoint = i;
                        int leftLength = breakpoint + 1;
                        int rightLength = length - leftLength;
-                       
+                               
                        float QLA = computePercentID(query, parentA, 0, breakpoint);
-                       float QRB = computePercentID(query, parentB, breakpoint+1, length - 1);
+                       float QRB = computePercentID(query, parentB, breakpoint+1, length-1);
                
                        float QLB = computePercentID(query, parentB, 0, breakpoint);
-                       float QRA = computePercentID(query, parentA, breakpoint+1, length - 1);
+                       float QRA = computePercentID(query, parentA, breakpoint+1, length-1);
                
                        float LAB = computePercentID(parentA, parentB, 0, breakpoint);
-                       float RAB = computePercentID(parentA, parentB, breakpoint+1, length - 1);
-               
+                       float RAB = computePercentID(parentA, parentB, breakpoint+1, length-1); 
+                       
                        float AB = ((LAB*leftLength) + (RAB*rightLength)) / (float) length;
                        float QA = ((QLA*leftLength) + (QRA*rightLength)) / (float) length;
                        float QB = ((QLB*leftLength) + (QRB*rightLength)) / (float) length;
@@ -166,10 +173,12 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                
                        float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB));
                        float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB));
+                       
+                       
                        //cout << q->getName() << '\t';
                        //cout << pA->getName() << '\t';
                        //cout << pB->getName() << '\t';
-                  // cout << "bp: " << breakpoint << " CHIM_TYPE_A\t" << divR_QLA_QRB << "\tQLA: " << QLA << "\tQRB: " << QRB << "\tQLA_QRB: " << QLA_QRB;
+                   //cout << "bp: " << breakpoint << " CHIM_TYPE_A\t" << divR_QLA_QRB << "\tQLA: " << QLA << "\tQRB: " << QRB << "\tQLA_QRB: " << QLA_QRB;
                        //cout << "\tCHIM_TYPE_B\t" << divR_QLB_QRA << "\tQLB: " << QLB << "\tQRA: " << QRA << "\tQLB_QRA: " << QLB_QRA << endl;
 //cout << leftLength << '\t' << rightLength << '\t' << QLA << '\t' << QRB << '\t' << QLB << '\t' << QRA  << '\t' << LAB << '\t' << RAB << '\t' << AB << '\t' << QA << '\t' << QB << '\t' << QLA_QRB << '\t' <<  QLB_QRA <<    endl;                    
 
@@ -200,7 +209,7 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                                        member.winLEnd = breakpoint;  
                                        member.winRStart = breakpoint+1; 
                                        member.winREnd = length-1; 
-                                       member.querySeq = *(q); 
+                                       member.querySeq = *(q);
                                        member.parentA = *(pA);
                                        member.parentB = *(pB);
                                        member.bsa = 0;
@@ -214,6 +223,7 @@ vector<data_struct> Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence*
                        }//if
                }//for
                
+               
                return data;
                
        }
@@ -227,7 +237,7 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
        try {
        
                vector<snps> data;
-//cout << left << '\t' << right << endl;
+
                for (int i = left; i <= right; i++) {
                        
                        char A = parentA[i];
@@ -235,33 +245,31 @@ vector<snps> Slayer::getSNPS(string parentA, string query, string parentB, int l
                        char B = parentB[i];
                        
                        if ((A != Q) || (B != Q)) {
-//cout << "not equal " << Q << '\t' << A << '\t' << B << endl;
-                       
+
                                //ensure not neighboring a gap. change to 12/09 release of chimeraSlayer - not sure what this adds, but it eliminates alot of SNPS
+
+                               
                                if (
                                        //did query loose a base here during filter??
                                        ( i == 0 || abs (baseSpots[0][i] - baseSpots[0][i-1]) == 1) &&
-                                       ( i == query.length() || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1)
+                                       ( i == query.length()-1 || abs (baseSpots[0][i] - baseSpots[0][i+1]) == 1)
                                        &&
                                        //did parentA loose a base here during filter??
                                        ( i == 0 || abs (baseSpots[1][i] - baseSpots[1][i-1]) == 1) &&
-                                       ( i == parentA.length() || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) 
+                                       ( i == parentA.length()-1 || abs (baseSpots[1][i] - baseSpots[1][i+1]) == 1) 
                                        &&
                                        //did parentB loose a base here during filter??
                                        ( i == 0 || abs (baseSpots[2][i] - baseSpots[2][i-1]) == 1) &&
-                                       ( i == parentB.length() || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1)
+                                       ( i == parentB.length()-1 || abs (baseSpots[2][i] - baseSpots[2][i+1]) == 1)
                                        ) 
                                { 
-                               
                                        snps member;
                                        member.queryChar = Q;
                                        member.parentAChar = A;
                                        member.parentBChar = B;
-//cout << "not neighboring a gap " << Q << '\t' << A << '\t' << B << '\t' << baseSpots[0][i] << '\t' << baseSpots[0][i+1] << '\t' << baseSpots[0][i-1] << '\t' << baseSpots[1][i] << '\t' << baseSpots[1][i+1] << '\t' << baseSpots[1][i-1] << '\t' << baseSpots[2][i] << '\t' << baseSpots[2][i+1] << '\t' << baseSpots[2][i-1] << endl;                              
                                        data.push_back(member);
                                }
                        }
-//                     cout << i << '\t' << data.size() << endl;
                }
                
                return data;
@@ -283,7 +291,7 @@ int Slayer::bootstrapSNPS(vector<snps> left, vector<snps> right, float& BSA, flo
        
                int numLeft = max(1, int(left.size() * percentSNPSample/(float)100 + 0.5));
                int numRight = max(1, int(right.size() * percentSNPSample/(float)100 + 0.5));
-               //cout << numLeft << '\t' << numRight << endl;
+
                for (int i = 0; i < numIters; i++) {
                        //random sampling with replacement.
                
@@ -451,6 +459,7 @@ float Slayer::computePercentID(string queryAlign, string chimera, int left, int
                                        }
                                }
                        }
+                       
                }
                
                float numBases = (countA + countB) /(float) 2;
@@ -458,7 +467,7 @@ float Slayer::computePercentID(string queryAlign, string chimera, int left, int
                if (numBases == 0) { return 0; }
                
                float percentIdentical = (numIdentical/(float)numBases) * 100;
-               
+
                return percentIdentical;
                
        }