From: westcott Date: Tue, 29 Sep 2009 16:05:10 +0000 (+0000) Subject: working on chimeraslayer and found bug in shared command. X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=58cc09a375d1e1afceef3b036574ff21394ccc4d working on chimeraslayer and found bug in shared command. --- diff --git a/chimera.h b/chimera.h index 3efe0e4..2d6ac0b 100644 --- a/chimera.h +++ b/chimera.h @@ -76,6 +76,7 @@ class Chimera { virtual void setDivR(float d) { divR = d; } virtual void setParents(int p) { parents = p; } virtual void setMinSim(int s) { minSim = s; } + virtual void setPrint(int p) { printAll = p; } virtual void setCons(string){}; @@ -93,7 +94,7 @@ class Chimera { protected: - bool filter, correction, svg; + bool filter, correction, svg, printAll; int processors, window, increment, numWanted, kmerSize, match, misMatch, minSim, parents; float divR; string seqMask, quanfile, filterString, name; diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp index 925de63..f368869 100644 --- a/chimeraseqscommand.cpp +++ b/chimeraseqscommand.cpp @@ -26,7 +26,7 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ else { //valid paramters for this command - string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents" }; + string Array[] = {"fasta", "filter", "correction", "processors", "method", "window", "increment", "template", "conservation", "quantile", "mask", "numwanted", "ksize", "svg", "name", "match","mismatch", "divergence", "minsim", "parents", "printall" }; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -78,6 +78,9 @@ ChimeraSeqsCommand::ChimeraSeqsCommand(string option){ temp = validParameter.validFile(parameters, "correction", false); if (temp == "not found") { temp = "T"; } correction = isTrue(temp); + temp = validParameter.validFile(parameters, "printall", false); if (temp == "not found") { temp = "F"; } + printAll = isTrue(temp); + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found") { temp = "1"; } convert(temp, processors); @@ -217,6 +220,7 @@ int ChimeraSeqsCommand::execute(){ chimera->setDivR(divR); chimera->setParents(parents); chimera->setMinSim(minSimilarity); + chimera->setPrint(printAll); //find chimeras diff --git a/chimeraseqscommand.h b/chimeraseqscommand.h index 865a84d..f19a79d 100644 --- a/chimeraseqscommand.h +++ b/chimeraseqscommand.h @@ -29,7 +29,7 @@ private: bool abort; string method, fastafile, templatefile, consfile, quanfile, maskfile, namefile; - bool filter, correction, svg; + bool filter, correction, svg, printAll; int processors, midpoint, averageLeft, averageRight, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity; float divR; Chimera* chimera; diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 293ee80..bd1908d 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -26,9 +26,119 @@ ChimeraSlayer::~ChimeraSlayer() { //*************************************************************************************************************** void ChimeraSlayer::print(ostream& out) { try { - mothurOutEndLine(); + for (int i = 0; i < querySeqs.size(); i++) { + + if (chimeraFlags[i] == "yes") { + mothurOut(querySeqs[i]->getName() + "\tyes"); mothurOutEndLine(); + + }else{ + out << querySeqs[i]->getName() << "\tno" << endl; + mothurOut("no"); + } + } +/* + + my $div_ratio_QLA_QRB = $data_struct->{div_ratio_QLA_QRB}; + my $div_ratio_QRA_QLB = $data_struct->{div_ratio_QLB_QRA}; + + my $per_id_QLA = $data_struct->{per_id_QLA}; + my $per_id_QRB = $data_struct->{per_id_QRB}; + my $per_id_AB = $data_struct->{per_id_AB}; + my $per_id_QA = $data_struct->{per_id_QA}; + my $per_id_QB = $data_struct->{per_id_QB}; + my $per_id_LAB = $data_struct->{per_id_LAB}; + my $per_id_RAB = $data_struct->{per_id_RAB}; + my $per_id_QRA = $data_struct->{per_id_QRA}; + my $per_id_QLB = $data_struct->{per_id_QLB}; + my $per_id_QLB_QRA = $data_struct->{per_id_QLB_QRA}; + my $per_id_QLA_QRB = $data_struct->{per_id_QLA_QRB}; + + my $win_left_end5 = $data_struct->{win_left_end5}; + my $win_left_end3 = $data_struct->{win_left_end3}; + my $win_right_end5 = $data_struct->{win_right_end5}; + my $win_right_end3 = $data_struct->{win_right_end3}; + my $Q = $data_struct->{query_alignment}; + my $A = $data_struct->{parent_A_alignment}; + my $B = $data_struct->{parent_B_alignment}; + my $BS_A = $data_struct->{BS_A}; + my $BS_B = $data_struct->{BS_B}; + + my @Q_chars = @{$Q->{align}}; + my @A_chars = @{$A->{align}}; + my @B_chars = @{$B->{align}}; + + my $query_acc = $Q->{acc}; + my $A_acc = $A->{acc}; + my $B_acc = $B->{acc}; + + my $break_left = $Q->{seqPos}->[$win_left_end3]; + my $break_right = $Q->{seqPos}->[$win_right_end5]; + + + cout << "//\n## CHIMERA\t" << querySeqs[i]->getName() << "\t" << $break_left-$break_right" << endl + << "\tDIV_QLARB: ". sprintf("%.3f", $div_ratio_QLA_QRB) + << "\tBS_QLARB: " . sprintf("%.2f", $BS_A) + << "\tDIV_QRALB: " . sprintf("%.3f", $div_ratio_QRA_QLB) + << "\tBS_QRALB: " . sprintf("%.2f", $BS_B) + << "\t$A_acc\t$B_acc" + << "\tbreakpoint: $break_left-$break_right\n\n"; + + ## draw illustration: + + print " Per_id parents: " . sprintf("%.2f", $per_id_AB) . "\n\n"; + print " Per_id(Q,A): " . sprintf("%.2f", $per_id_QA) . "\n"; + print "--------------------------------------------------- A: $A_acc\n" + . " " . sprintf("%.2f", $per_id_QLA) . " " . sprintf("%.2f", $per_id_QRA) . "\n" + . "~~~~~~~~~~~~~~~~~~~~~~~~\\ /~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n" + . "DivR: " . sprintf("%.3f", $div_ratio_QLA_QRB) . " BS: " . sprintf("%.2f", $BS_A) . " |\n" + . "Per_id(QLA,QRB): " . sprintf("%.2f", $per_id_QLA_QRB) . " |\n" + . " |\n" + . " (L-AB: " . sprintf("%.2f", $per_id_LAB) . ") | (R-AB: " . sprintf("%.2f", $per_id_RAB) . ")\n" + . " WinL:$win_left_end5-$win_left_end3 | WinR:$win_right_end5-$win_right_end3\n" + . " |\n" + . "Per_id(QLB,QRA): " . sprintf("%.2f", $per_id_QLB_QRA) . " |\n" + . "DivR: " . sprintf("%.3f", $div_ratio_QRA_QLB) . " BS: " . sprintf("%.2f", $BS_B) . " |\n" + . "~~~~~~~~~~~~~~~~~~~~~~~~/ \\~~~~~~~~~~~~~~~~~~~~~~~~~ Q: $query_acc\n" + . " " . sprintf("%.2f", $per_id_QLB) . " " . sprintf("%.2f", $per_id_QRB) . "\n" + . "---------------------------------------------------- B: $B_acc\n"; + print " Per_id(Q,B): ". sprintf("%.2f", $per_id_QB) . "\n\n"; + + my $deltaL = $per_id_QLA - $per_id_QLB; + my $deltaR = $per_id_QRA - $per_id_QRB; + + print "DeltaL: " . sprintf("%.2f", $deltaL) . " DeltaR: " . sprintf("%.2f", $deltaR) . "\n\n"; + + unless ($printAlignmentsFlag) { return; } + + + ## build the left windows: + my @Q_left_win = @Q_chars[$win_left_end5..$win_left_end3]; + my @A_left_win = @A_chars[$win_left_end5..$win_left_end3]; + my @B_left_win = @B_chars[$win_left_end5..$win_left_end3]; + + &print_alignment($A_acc, \@A_left_win, + $query_acc, \@Q_left_win, + $B_acc, \@B_left_win); + + print "\t\t** Breakpoint **\n\n"; + + my @Q_right_win = @Q_chars[$win_right_end5..$win_right_end3]; + my @A_right_win = @A_chars[$win_right_end5..$win_right_end3]; + my @B_right_win = @B_chars[$win_right_end5..$win_right_end3]; + + &print_alignment($A_acc, \@A_right_win, + $query_acc, \@Q_right_win, + $B_acc, \@B_right_win); + + return; +} + + +#### + + */ } catch(exception& e) { @@ -49,6 +159,9 @@ void ChimeraSlayer::getChimeras() { int numSeqs = querySeqs.size(); + chimeraResults.resize(numSeqs); + chimeraFlags.resize(numSeqs, "no"); + //break up file if needed int linesPerProcess = numSeqs / processors ; @@ -80,11 +193,10 @@ void ChimeraSlayer::getChimeras() { float percentIdentical = maligner->getPercentID(); vector Results = maligner->getOutput(); - cout << querySeqs[4]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl; + //cout << querySeqs[i]->getName() << '\t' << chimeraFlag << '\t' << percentIdentical << endl; for (int j = 0; j < Results.size(); j++) { - cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl; - + //cout << "regionStart = " << Results[j].regionStart << "\tRegionEnd = " << Results[j].regionEnd << "\tName = " << Results[j].parent << "\tPerQP = " << Results[j].queryToParent << "\tLocalPerQP = " << Results[j].queryToParentLocal << "\tdivR = " << Results[j].divR << endl; } if (chimeraFlag == "yes") { @@ -116,7 +228,7 @@ void ChimeraSlayer::getChimeras() { seqs.pop_back(); } } - + //put seqs into vector to send to slayer vector seqsForSlayer; for (int k = 0; k < seqs.size(); k++) { seqsForSlayer.push_back(seqs[k].seq); } @@ -136,8 +248,9 @@ void ChimeraSlayer::getChimeras() { } //send to slayer - slayer->getResults(querySeqs[i], seqsForSlayer); - + chimeraFlags[i] = slayer->getResults(querySeqs[i], seqsForSlayer); + chimeraResults[i] = slayer->getOutput(); + //free memory for (int k = 0; k < seqs.size(); k++) { delete seqs[k].seq; } } diff --git a/chimeraslayer.h b/chimeraslayer.h index 617ba7e..a305b5b 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -40,6 +40,9 @@ class ChimeraSlayer : public Chimera { vector lines; vector querySeqs; vector templateSeqs; + + vector< vector > chimeraResults; + vector chimeraFlags; string fastafile, templateFile; diff --git a/maligner.cpp b/maligner.cpp index 824a2cc..c7f9be0 100644 --- a/maligner.cpp +++ b/maligner.cpp @@ -26,12 +26,6 @@ string Maligner::getResults(Sequence* q) { //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate refSeqs = decalc->findClosest(query, db, numWanted); - ofstream out; - string outFile = "parentsOf" + query->getName(); - openOutputFile(outFile, out); - for (int i = 0; i < refSeqs.size(); i++) { refSeqs[i]->printSequence(out); } - out.close(); - refSeqs = minCoverageFilter(refSeqs); if (refSeqs.size() < 2) { diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 2ce9bbc..62d8b3a 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -223,10 +223,10 @@ void SharedCommand::createMisMatchFile() { map namesInList; //go through listfile and get names - for (int i = 0; i < SharedList->getNumSeqs(); i++) { + for (int i = 0; i < SharedList->getNumBins(); i++) { string names = SharedList->get(i); - + while (names.find_first_of(',') != -1) { string name = names.substr(0,names.find_first_of(',')); names = names.substr(names.find_first_of(',')+1, names.length()); diff --git a/slayer.cpp b/slayer.cpp index d838a2e..6e7f0e3 100644 --- a/slayer.cpp +++ b/slayer.cpp @@ -13,8 +13,10 @@ Slayer::Slayer(int win, int increment, int parentThreshold, float div) : windowSize(win), windowStep(increment), parentFragmentThreshold(parentThreshold), divRThreshold(div) {} /***********************************************************************/ -void Slayer::getResults(Sequence* query, vector refSeqs) { +string Slayer::getResults(Sequence* query, vector refSeqs) { try { +cout << "refSeqs = " << refSeqs.size() << endl; + vector all; all.clear(); for (int i = 0; i < refSeqs.size(); i++) { @@ -27,14 +29,67 @@ void Slayer::getResults(Sequence* query, vector refSeqs) { vector divs = runBellerophon(q, leftParent, rightParent); + vector selectedDivs; + for (int k = 0; k < divs.size(); k++) { + + vector snpsLeft = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winLStart, divs[k].winLEnd); + vector snpsRight = getSNPS(divs[k].parentA.getAligned(), divs[k].querySeq.getAligned(), divs[k].parentB.getAligned(), divs[k].winRStart, divs[k].winREnd); + + int numSNPSLeft = snpsLeft.size(); + int numSNPSRight = snpsRight.size(); + + //require at least 3 SNPs on each side of the break + if ((numSNPSLeft >= 3) && (numSNPSRight >= 3)) { + + int winSizeLeft = divs[k].winLEnd - divs[k].winLStart + 1; + int winSizeRight = divs[k].winREnd - divs[k].winRStart + 1; + + float snpRateLeft = numSNPSLeft / (float) winSizeLeft; + float snpRateRight = numSNPSRight / (float) winSizeRight; + + float logR = log(snpRateLeft / snpRateRight) / log(2); + + // do not accept excess snp ratio on either side of the break + if (abs(logR) < 1 ) { + + float BS_A, BS_B; + bootstrapSNPS(snpsLeft, snpsRight, BS_A, BS_B); + + divs[k].bsa = BS_A; + divs[k].bsb = BS_B; + + divs[k].bsMax = max(BS_A, BS_B); + + divs[k].chimeraMax = max(divs[k].qla_qrb, divs[k].qlb_qra); + + selectedDivs.push_back(divs[k]); + } + } + } + + //save selected + for (int m = 0; m < selectedDivs.size(); m++) { all.push_back(selectedDivs[m]); } + delete q; delete leftParent; delete rightParent; } - } + // compute bootstrap support + if (all.size() > 0) { + //sort them + sort(all.begin(), all.end(), compareDataStruct); + reverse(all.begin(), all.end()); + + outputResults = all; + return "yes"; + } + else { + outputResults = all; + return "no"; + } } catch(exception& e) { errorOut(e, "Slayer", "getResults"); @@ -42,19 +97,96 @@ void Slayer::getResults(Sequence* query, vector refSeqs) { } } /***********************************************************************/ -vector Slayer::runBellerophon(Sequence* query, Sequence* parentA, Sequence* parentB) { +vector Slayer::runBellerophon(Sequence* q, Sequence* pA, Sequence* pB) { try{ vector data; - +cout << q->getName() << '\t' << q->getAligned().length() << endl; //vertical filter vector temp; + temp.push_back(q); temp.push_back(pA); temp.push_back(pB); verticalFilter(temp); - int alignLength = query->getAligned().length(); + //get these to avoid numerous function calls + string query = q->getAligned(); + string parentA = pA->getAligned(); + string parentB = pB->getAligned(); + int length = query.length(); +cout << q->getName() << '\t' << length << endl; + + //check window size + if (length < (2*windowSize+windowStep)) { + mothurOut("Your window size is too large for " + q->getName() + ". I will make the window size " + toString(length/4) + " which is 1/4 the filtered length."); mothurOutEndLine(); + windowSize = length / 4; + } + + for (int i = windowSize-1; i <= (length - windowSize); i += windowStep) { + + int breakpoint = i; + int leftLength = breakpoint + 1; + int rightLength = length - leftLength; + + float QLA = computePercentID(query, parentA, 0, breakpoint); + float QRB = computePercentID(query, parentB, breakpoint+1, length - 1); + + float QLB = computePercentID(query, parentB, 0, breakpoint); + float QRA = computePercentID(query, parentA, breakpoint+1, length - 1); + float LAB = computePercentID(parentA, parentB, 0, breakpoint); + float RAB = computePercentID(parentA, parentB, breakpoint+1, length - 1); + float AB = ((LAB*leftLength) + (RAB*rightLength)) / (float) length; + float QA = ((QLA*leftLength) + (QRA*rightLength)) / (float) length; + float QB = ((QLB*leftLength) + (QRB*rightLength)) / (float) length; + float QLA_QRB = ((QLA*leftLength) + (QRB*rightLength)) / (float) length; + float QLB_QRA = ((QLB*leftLength) + (QRA*rightLength)) / (float) length; + + //in original and not used + //float avgQA_QB = ((QA*leftLength) + (QB*rightLength)) / (float) length; + + float divR_QLA_QRB = min((QLA_QRB/QA), (QLA_QRB/QB)); + + float divR_QLB_QRA = min((QLB_QRA/QA), (QLB_QRA/QB)); + + //is one of them above the + if (divR_QLA_QRB >= divRThreshold || divR_QLB_QRA >= divRThreshold) { + + if (((QLA_QRB > QA) && (QLA_QRB > QB) && (QLA >= parentFragmentThreshold) && (QRB >= parentFragmentThreshold)) || + ((QLB_QRA > QA) && (QLB_QRA > QB) && (QLB >=parentFragmentThreshold) && (QRA >= parentFragmentThreshold))) { + + data_struct member; + + member.divr_qla_qrb = divR_QLA_QRB; + member.divr_qlb_qra = divR_QLB_QRA; + member.qla_qrb = QLA_QRB; + member.qlb_qra = QLB_QRA; + member.qla = QLA; + member.qrb = QRB; + member.ab = AB; + member.qa = QA; + member.qb = QB; + member.lab = LAB; + member.rab = RAB; + member.qra = QRA; + member.qlb = QLB; + member.winLStart = 0; + member.winLEnd = breakpoint; + member.winRStart = breakpoint+1; + member.winREnd = length-1; + member.querySeq = *(q); + member.parentA = *(pA); + member.parentB = *(pB); + member.bsa = 0; + member.bsb = 0; + member.bsMax = 0; + member.chimeraMax = 0; + + data.push_back(member); + + }//if + }//if + }//for return data; @@ -65,6 +197,166 @@ vector Slayer::runBellerophon(Sequence* query, Sequence* parentA, S } } /***********************************************************************/ +vector Slayer::getSNPS(string parentA, string query, string parentB, int left, int right) { + try { + + vector data; + + for (int i = left; i <= right; i++) { + + char A = parentA[i]; + char Q = query[i]; + char B = parentB[i]; + + if ((A != Q) || (B != Q)) { + snps member; + member.queryChar = Q; + member.parentAChar = A; + member.parentBChar = B; + + data.push_back(member); + } + } + + return data; + + } + catch(exception& e) { + errorOut(e, "Slayer", "getSNPS"); + exit(1); + } +} +/***********************************************************************/ +void Slayer::bootstrapSNPS(vector left, vector right, float& BSA, float& BSB) { + try { + + srand((unsigned)time( NULL )); + + int count_A = 0; // sceneario QLA,QRB supported + int count_B = 0; // sceneario QLB,QRA supported + + int numLeft = max(1, int(left.size()/10 +0.5)); + int numRight = max(1, int(right.size()/10 + 0.5)); + + for (int i = 0; i < 100; i++) { + //random sampling with replacement. + + vector selectedLeft; + for (int j = 0; j < numLeft; j++) { + int index = int(rand() % left.size()); + selectedLeft.push_back(left[index]); + } + + vector selectedRight; + for (int j = 0; j < numRight; j++) { + int index = int(rand() % right.size()); + selectedRight.push_back(right[index]); + } + + /* A ------------------------------------------ + # QLA QRA + # Q ------------------------------------------ + # | + # | + # Q ------------------------------------------ + # QLB QRB + # B ------------------------------------------ */ + + + float QLA = snpQA(selectedLeft); + float QRA = snpQA(selectedRight); + + float QLB = snpQB(selectedLeft); + float QRB = snpQB(selectedRight); + + //in original - not used - not sure why? + //float ALB = snpAB(selectedLeft); + //float ARB = snpAB(selectedRight); + + if ((QLA > QLB) && (QRB > QRA)) { + count_A++; + } + + if ((QLB > QLA) && (QRA > QRB)) { + count_B++; + } + + } + + BSA = (float) count_A; + BSB = (float) count_B; + + } + catch(exception& e) { + errorOut(e, "Slayer", "bootstrapSNPS"); + exit(1); + } +} +/***********************************************************************/ +float Slayer::snpQA(vector data) { + try { + + int numIdentical = 0; + + for (int i = 0; i < data.size(); i++) { + if (data[i].parentAChar == data[i].queryChar) { + numIdentical++; + } + } + + float percentID = (numIdentical / data.size()) * 100; + + return percentID; + } + catch(exception& e) { + errorOut(e, "Slayer", "snpQA"); + exit(1); + } +} +/***********************************************************************/ +float Slayer::snpQB(vector data) { + try { + + int numIdentical = 0; + + for (int i = 0; i < data.size(); i++) { + if (data[i].parentBChar == data[i].queryChar) { + numIdentical++; + } + } + + float percentID = (numIdentical / data.size()) * 100; + + return percentID; + + } + catch(exception& e) { + errorOut(e, "Slayer", "snpQB"); + exit(1); + } +} +/***********************************************************************/ +float Slayer::snpAB(vector data) { + try { + int numIdentical = 0; + + for (int i = 0; i < data.size(); i++) { + if (data[i].parentAChar == data[i].parentBChar) { + numIdentical++; + } + } + + float percentID = (numIdentical / data.size()) * 100; + + return percentID; + + } + catch(exception& e) { + errorOut(e, "Slayer", "snpAB"); + exit(1); + } +} +/***********************************************************************/ float Slayer::computePercentID(string queryFrag, string parent, int left, int right) { try { int total = 0; @@ -87,7 +379,7 @@ float Slayer::computePercentID(string queryFrag, string parent, int left, int ri } } /***********************************************************************/ -//this is a vertical filter +//remove columns that contain any gaps void Slayer::verticalFilter(vector seqs) { try { vector gaps; gaps.resize(seqs[0]->getAligned().length(), 0); @@ -101,16 +393,17 @@ void Slayer::verticalFilter(vector seqs) { for (int j = 0; j < seqAligned.length(); j++) { //if this spot is a gap - if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; } + if ((seqAligned[j] == '-') || (seqAligned[j] == '.') || (toupper(seqAligned[j]) == 'N')) { gaps[j]++; } } } //zero out spot where all sequences have blanks int numColRemoved = 0; + for(int i = 0; i < seqs[0]->getAligned().length(); i++){ - if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; } + if(gaps[i] != 0) { filterString[i] = '0'; numColRemoved++; } } - + //for each sequence for (int i = 0; i < seqs.size(); i++) { diff --git a/slayer.h b/slayer.h index e898989..ea79727 100644 --- a/slayer.h +++ b/slayer.h @@ -15,15 +15,49 @@ /***********************************************************************/ //This class was modeled after the chimeraSlayer written by the Broad Institute /***********************************************************************/ -struct data_struct { //not right needs work... - int regionStart; - int regionEnd; - string parent; - float queryToParent; - float queryToParentLocal; - float divR; +struct data_struct { //this is crazy big...but follow original. + float divr_qla_qrb; + float divr_qlb_qra; + float qla_qrb; + float qlb_qra; + float qla; + float qrb; + float ab; + float qa; + float qb; + float lab; + float rab; + float qra; + float qlb; + int winLStart; + int winLEnd; + int winRStart; + int winREnd; + Sequence querySeq; + Sequence parentA; + Sequence parentB; + float bsa; + float bsb; + float bsMax; + float chimeraMax; + }; /***********************************************************************/ +//sorts lowest to highest first by bsMax, then if tie by chimeraMax +inline bool compareDataStruct(data_struct left, data_struct right){ + if (left.bsMax < right.bsMax) { return true; } + else if (left.bsMax == right.bsMax) { + return (left.chimeraMax < right.chimeraMax); + }else { return false; } +} +/***********************************************************************/ +struct snps { + char queryChar; + char parentAChar; + char parentBChar; +}; + +/***********************************************************************/ class Slayer { @@ -33,21 +67,25 @@ class Slayer { Slayer(int, int, int, float); ~Slayer() {}; - void getResults(Sequence*, vector); - //float getPercentID() { return percentIdenticalQueryChimera; } - //vector getOutput() { return outputResults; } + string getResults(Sequence*, vector); + vector getOutput() { return outputResults; } private: int windowSize, windowStep, parentFragmentThreshold; float divRThreshold; + vector outputResults; void verticalFilter(vector); float computePercentID(string, string, int, int); vector runBellerophon(Sequence*, Sequence*, Sequence*); - + vector getSNPS(string, string, string, int, int); + void bootstrapSNPS(vector, vector, float&, float&); + float snpQA(vector); + float snpQB(vector); + float snpAB(vector); };