X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=bellerophon.cpp;h=833cfb907d6d4bd2aed8acb3d8522932ac3b2e7d;hp=5e219d49c9c6c0b41a80614d347fefbbf3470350;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=f11da43e273f150a8ece7924c4c7f7b43862b372 diff --git a/bellerophon.cpp b/bellerophon.cpp index 5e219d4..833cfb9 100644 --- a/bellerophon.cpp +++ b/bellerophon.cpp @@ -13,223 +13,494 @@ #include "onegapdist.h" -//*************************************************************************************************************** +/***************************************************************************************************************/ -Bellerophon::Bellerophon(string name) { +Bellerophon::Bellerophon(string name, bool filterSeqs, bool c, int win, int inc, int p, string o) : Chimera() { try { fastafile = name; + correction = c; + outputDir = o; + window = win; + increment = inc; + processors = p; + + //read in sequences + seqs = readSeqs(fastafile); + numSeqs = seqs.size(); + if (numSeqs == 0) { m->mothurOut("Error in reading you sequences."); m->mothurOutEndLine(); exit(1); } + + //do soft filter + if (filterSeqs) { + createFilter(seqs, 0.5); + for (int i = 0; i < seqs.size(); i++) { runFilter(seqs[i]); } + } + + distCalculator = new eachGapDist(); + + //set default window to 25% of sequence length + string seq0 = seqs[0]->getAligned(); + if (window == 0) { window = seq0.length() / 4; } + else if (window > (seq0.length() / 2)) { + m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); m->mothurOutEndLine(); + window = (seq0.length() / 2); + } + + if (increment > (seqs[0]->getAlignLength() - (2*window))) { + if (increment != 10) { + + m->mothurOut("You have selected a increment that is too large. I will use the default."); m->mothurOutEndLine(); + increment = 10; + if (increment > (seqs[0]->getAlignLength() - (2*window))) { increment = 0; } + + }else{ increment = 0; } + } + + if (increment == 0) { iters = 1; } + else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); } + + //initialize pref + pref.resize(iters); + for (int i = 0; i < iters; i++) { + Preference temp; + for (int j = 0; j < numSeqs; j++) { + pref[i].push_back(temp); + } + } + } catch(exception& e) { - errorOut(e, "Bellerophon", "Bellerophon"); + m->errorOut(e, "Bellerophon", "Bellerophon"); exit(1); } } //*************************************************************************************************************** -void Bellerophon::print(ostream& out) { +int Bellerophon::print(ostream& out, ostream& outAcc, string s) { try { int above1 = 0; + + //sorted "best" preference scores for all seqs + vector best = getBestPref(); + + if (m->control_pressed) { return numSeqs; } + out << "Name\tScore\tLeft\tRight\t" << endl; //output prefenence structure to .chimeras file - for (int i = 0; i < pref.size(); i++) { - out << pref[i].name << '\t' << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl; + for (int i = 0; i < best.size(); i++) { + + if (m->control_pressed) { return numSeqs; } + + out << best[i].name << '\t' << setprecision(3) << best[i].score << '\t' << best[i].leftParent << '\t' << best[i].rightParent << endl; //calc # of seqs with preference above 1.0 - if (pref[i].score[0] > 1.0) { + if (best[i].score > 1.0) { above1++; - mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine(); - mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine(); + outAcc << best[i].name << endl; + m->mothurOut(best[i].name + " is a suspected chimera at breakpoint " + toString(best[i].midpoint)); m->mothurOutEndLine(); + m->mothurOut("It's score is " + toString(best[i].score) + " with suspected left parent " + best[i].leftParent + " and right parent " + best[i].rightParent); m->mothurOutEndLine(); } } //output results to screen - mothurOutEndLine(); - mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine(); + m->mothurOutEndLine(); + m->mothurOut("Sequence with preference score above 1.0: " + toString(above1)); m->mothurOutEndLine(); int spot; - spot = pref.size()-1; - mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine(); - spot = pref.size() * 0.975; - mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine(); - spot = pref.size() * 0.75; - mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine(); - spot = pref.size() * 0.50; - mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine(); - spot = pref.size() * 0.25; - mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine(); - spot = pref.size() * 0.025; - mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine(); + spot = best.size()-1; + m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.975; + m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.75; + m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.50; + m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.25; + m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.025; + m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); spot = 0; - mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine(); + m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + + return numSeqs; } catch(exception& e) { - errorOut(e, "Bellerophon", "print"); + m->errorOut(e, "Bellerophon", "print"); exit(1); } } +#ifdef USE_MPI +//*************************************************************************************************************** +int Bellerophon::print(MPI_File& out, MPI_File& outAcc, string s) { + try { + + int pid; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + + if (pid == 0) { + string outString = ""; + + //sorted "best" preference scores for all seqs + vector best = getBestPref(); + + int above1 = 0; + int ninetyfive = best.size() * 0.05; + float cutoffScore = best[ninetyfive].score; + + if (m->control_pressed) { return numSeqs; } + + outString = "Name\tScore\tLeft\tRight\n"; + MPI_Status status; + int olength = outString.length(); + char* buf5 = new char[olength]; + memcpy(buf5, outString.c_str(), olength); + + MPI_File_write_shared(out, buf5, olength, MPI_CHAR, &status); + + delete buf5; + //output prefenence structure to .chimeras file + for (int i = 0; i < best.size(); i++) { + + if (m->control_pressed) { return numSeqs; } + + outString = best[i].name + "\t" + toString(best[i].score) + "\t" + best[i].leftParent + "\t" + best[i].rightParent + "\n"; + + MPI_Status status; + int length = outString.length(); + char* buf2 = new char[length]; + memcpy(buf2, outString.c_str(), length); + + MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status); + + delete buf2; + + //calc # of seqs with preference above 95%tile + if (best[i].score >= cutoffScore) { + above1++; + string outAccString = ""; + outAccString += best[i].name + "\n"; + + MPI_Status statusAcc; + length = outAccString.length(); + char* buf = new char[length]; + memcpy(buf, outAccString.c_str(), length); + + MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc); + + delete buf; + + cout << best[i].name << " is a suspected chimera at breakpoint " << toString(best[i].midpoint) << endl; + cout << "It's score is " << toString(best[i].score) << " with suspected left parent " << best[i].leftParent << " and right parent " << best[i].rightParent << endl; + } + } + + //output results to screen + m->mothurOutEndLine(); + m->mothurOut("Sequence with preference score above " + toString(cutoffScore) + ": " + toString(above1)); m->mothurOutEndLine(); + int spot; + spot = best.size()-1; + m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.975; + m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.75; + m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.50; + m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.25; + m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = best.size() * 0.025; + m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + spot = 0; + m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine(); + + } + + return numSeqs; + + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "print"); + exit(1); + } +} +#endif //******************************************************************************************************************** //sorts highest score to lowest inline bool comparePref(Preference left, Preference right){ - return (left.score[0] > right.score[0]); + return (left.score > right.score); } - //*************************************************************************************************************** -void Bellerophon::getChimeras() { +int Bellerophon::getChimeras() { try { - //do soft filter - if (filter) { - string optionString = "fasta=" + fastafile + ", soft=50"; - filterSeqs = new FilterSeqsCommand(optionString); - filterSeqs->execute(); - delete filterSeqs; - - //reset fastafile to filtered file - fastafile = getRootName(fastafile) + "filter.fasta"; - } - - distCalculator = new eachGapDist(); - - //read in sequences - seqs = readSeqs(fastafile); - - int numSeqs = seqs.size(); + //create breaking points + vector midpoints; midpoints.resize(iters, window); + for (int i = 1; i < iters; i++) { midpoints[i] = midpoints[i-1] + increment; } + + #ifdef USE_MPI + int pid, numSeqsPerProcessor; + + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); - if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); exit(1); } + numSeqsPerProcessor = iters / processors; - //set default window to 25% of sequence length - string seq0 = seqs[0]->getAligned(); - if (window == 0) { window = seq0.length() / 4; } - else if (window > (seq0.length() / 2)) { - mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine(); - window = (seq0.length() / 2); + //each process hits this only once + unsigned long long startPos = pid * numSeqsPerProcessor; + if(pid == processors - 1){ + numSeqsPerProcessor = iters - pid * numSeqsPerProcessor; } + lines.push_back(linePair(startPos, numSeqsPerProcessor)); - if (increment > (seqs[0]->getAlignLength() - (2*window))) { - if (increment != 10) { - - mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine(); - increment = 10; - if (increment > (seqs[0]->getAlignLength() - (2*window))) { increment = 0; } - - }else{ increment = 0; } - } -cout << "increment = " << increment << endl; - if (increment == 0) { iters = 1; } - else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); } + //fill pref with scores + driverChimeras(midpoints, lines[0]); - //initialize pref - pref.resize(numSeqs); - - for (int i = 0; i < numSeqs; i++ ) { - pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2); pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3); - pref[i].name = seqs[i]->getName(); - pref[i].score[0] = 0.0; pref[i].score[1] = 0.0; - pref[i].closestLeft[0] = 100000.0; pref[i].closestLeft[1] = 100000.0; - pref[i].closestRight[0] = 100000.0; pref[i].closestRight[1] = 100000.0; - } - - int midpoint = window; - int count = 0; - while (count < iters) { + if (m->control_pressed) { return 0; } + + //each process must send its parts back to pid 0 + if (pid == 0) { + + //receive results + for (int j = 1; j < processors; j++) { - //create 2 vectors of sequences, 1 for left side and one for right side - vector left; vector right; + vector MPIBestSend; + for (int i = 0; i < numSeqs; i++) { - for (int i = 0; i < seqs.size(); i++) { -//cout << "whole = " << seqs[i].getAligned() << endl; - //save left side - string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window); - Sequence tempLeft; - tempLeft.setName(seqs[i]->getName()); - tempLeft.setAligned(seqLeft); - left.push_back(tempLeft); -//cout << "left = " << tempLeft.getAligned() << endl; - //save right side - string seqRight = seqs[i]->getAligned().substr(midpoint, window); - Sequence tempRight; - tempRight.setName(seqs[i]->getName()); - tempRight.setAligned(seqRight); - right.push_back(tempRight); -//cout << "right = " << seqRight << endl; + if (m->control_pressed) { return 0; } + + MPI_Status status; + //receive string + int length; + MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status); + + char* buf = new char[length]; + MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); + + string temp = buf; + if (temp.length() > length) { temp = temp.substr(0, length); } + delete buf; + + MPIBestSend.push_back(temp); } - //adjust midpoint by increment - midpoint += increment; + fillPref(j, MPIBestSend); + if (m->control_pressed) { return 0; } + } + + }else { + //takes best window for each sequence and turns Preference to string that can be parsed by pid 0. + //played with this a bit, but it may be better to try user-defined datatypes with set string lengths?? + vector MPIBestSend = getBestWindow(lines[0]); + pref.clear(); - //this should be parallelized - //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | ) - //create a matrix containing the distance from left to left and right to right - //calculate distances - SparseMatrix* SparseLeft = new SparseMatrix(); - SparseMatrix* SparseRight = new SparseMatrix(); + //send your result to parent + for (int i = 0; i < numSeqs; i++) { - createSparseMatrix(0, left.size(), SparseLeft, left); - createSparseMatrix(0, right.size(), SparseRight, right); + if (m->control_pressed) { return 0; } - vector distMapRight; - vector distMapLeft; + int bestLength = MPIBestSend[i].length(); + char* buf = new char[bestLength]; + memcpy(buf, MPIBestSend[i].c_str(), bestLength); - // Create a data structure to quickly access the distance information. - //this is from thallingers reimplementation on get.oturep - // It consists of a vector of distance maps, where each map contains - // all distances of a certain sequence. Vector and maps are accessed - // via the index of a sequence in the distance matrix - distMapRight = vector(numSeqs); - distMapLeft = vector(numSeqs); - //cout << "left" << endl << endl; - for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) { - distMapLeft[currentCell->row][currentCell->column] = currentCell->dist; - //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl; - } - //cout << "right" << endl << endl; - for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) { - distMapRight[currentCell->row][currentCell->column] = currentCell->dist; - //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl; - } - - delete SparseLeft; - delete SparseRight; + MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); + MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD); + delete buf; + } + + MPIBestSend.clear(); + } + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + #else + + //divide breakpoints between processors + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + if(processors == 1){ + lines.push_back(linePair(0, iters)); + //fill pref with scores + driverChimeras(midpoints, lines[0]); + + }else{ + + int numSeqsPerProcessor = iters / processors; - //fill preference structure - generatePreferences(distMapLeft, distMapRight, midpoint); + for (int i = 0; i < processors; i++) { + unsigned long long startPos = i * numSeqsPerProcessor; + if(i == processors - 1){ + numSeqsPerProcessor = iters - i * numSeqsPerProcessor; + } + lines.push_back(linePair(startPos, numSeqsPerProcessor)); + } - count++; + createProcesses(midpoints); + } + #else + lines.push_back(linePair(0, iters)); + + ///fill pref with scores + driverChimeras(midpoints, lines[0]); + #endif + + #endif + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "getChimeras"); + exit(1); + } +} +/**************************************************************************************************/ + +int Bellerophon::createProcesses(vector mid) { + try { +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + int process = 0; + int exitCommand = 1; + vector processIDS; + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + exitCommand = driverChimeras(mid, lines[process]); + string tempOut = outputDir + toString(getpid()) + ".temp"; + writePrefs(tempOut, lines[process]); + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } } - delete distCalculator; + //force parent to wait until all the processes are done + for (int i=0;ierrorOut(e, "AlignCommand", "createProcesses"); + exit(1); + } +} +//*************************************************************************************************************** +int Bellerophon::driverChimeras(vector midpoints, linePair line) { + try { + + for (int h = line.start; h < (line.start + line.num); h++) { + count = h; + int midpoint = midpoints[h]; + + //initialize pref[count] + for (int i = 0; i < numSeqs; i++ ) { + pref[count][i].name = seqs[i]->getName(); + pref[count][i].midpoint = midpoint; + } - //how much higher or lower is this than expected - pref[i].score[0] = pref[i].score[0] / expectedPercent; + if (m->control_pressed) { return 0; } + + //create 2 vectors of sequences, 1 for left side and one for right side + vector left; vector right; + + for (int i = 0; i < seqs.size(); i++) { + + if (m->control_pressed) { return 0; } + + //cout << "midpoint = " << midpoint << "\twindow = " << window << endl; + //cout << "whole = " << seqs[i]->getAligned().length() << endl; + //save left side + string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window); + Sequence tempLeft; + tempLeft.setName(seqs[i]->getName()); + tempLeft.setAligned(seqLeft); + left.push_back(tempLeft); + //cout << "left = " << tempLeft.getAligned().length() << endl; + //save right side + string seqRight = seqs[i]->getAligned().substr(midpoint, window); + Sequence tempRight; + tempRight.setName(seqs[i]->getName()); + tempRight.setAligned(seqRight); + right.push_back(tempRight); + //cout << "right = " << seqRight.length() << endl; + } + + //this should be parallelized + //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | ) + //create a matrix containing the distance from left to left and right to right + //calculate distances + SparseMatrix* SparseLeft = new SparseMatrix(); + SparseMatrix* SparseRight = new SparseMatrix(); + + createSparseMatrix(0, left.size(), SparseLeft, left); + + if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; } + + createSparseMatrix(0, right.size(), SparseRight, right); + + if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; } + + left.clear(); right.clear(); + vector distMapRight; + vector distMapLeft; + + // Create a data structure to quickly access the distance information. + //this is from thallingers reimplementation on get.oturep + // It consists of a vector of distance maps, where each map contains + // all distances of a certain sequence. Vector and maps are accessed + // via the index of a sequence in the distance matrix + distMapRight = vector(numSeqs); + distMapLeft = vector(numSeqs); + //cout << "left" << endl << endl; + for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) { + distMapLeft[currentCell->row][currentCell->column] = currentCell->dist; + if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; } + //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl; + } + //cout << "right" << endl << endl; + for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) { + distMapRight[currentCell->row][currentCell->column] = currentCell->dist; + if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; } + //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl; + } + + delete SparseLeft; + delete SparseRight; + + //fill preference structure + generatePreferences(distMapLeft, distMapRight, midpoint); + if (m->control_pressed) { return 0; } + //report progress + if((h+1) % 10 == 0){ cout << "Processing sliding window: " << toString(h+1) << "\n"; m->mothurOutJustToLog("Processing sliding window: " + toString(h+1) + "\n") ; } } - - //sort Preferences highest to lowest - sort(pref.begin(), pref.end(), comparePref); - - + //report progress + if((line.start + line.num) % 10 != 0){ cout << "Processing sliding window: " << toString(line.start + line.num) << "\n"; m->mothurOutJustToLog("Processing sliding window: " + toString(line.start + line.num) + "\n") ; } + return 0; + } catch(exception& e) { - errorOut(e, "Bellerophon", "getChimeras"); + m->errorOut(e, "Bellerophon", "driverChimeras"); exit(1); } } @@ -241,6 +512,8 @@ int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* spar for(int i=startSeq; icontrol_pressed) { return 0; } distCalculator->calcDist(s[i], s[j]); float dist = distCalculator->getDist(); @@ -254,27 +527,17 @@ int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* spar return 1; } catch(exception& e) { - errorOut(e, "Bellerophon", "createSparseMatrix"); + m->errorOut(e, "Bellerophon", "createSparseMatrix"); exit(1); } } /***************************************************************************************************************/ -void Bellerophon::generatePreferences(vector left, vector right, int mid){ +int Bellerophon::generatePreferences(vector left, vector right, int mid){ try { - float dme = 0.0; SeqMap::iterator itR; SeqMap::iterator itL; - //initialize pref[i] - for (int i = 0; i < pref.size(); i++) { - pref[i].score[1] = 0.0; - pref[i].closestLeft[1] = 100000.0; - pref[i].closestRight[1] = 100000.0; - pref[i].leftParent[1] = ""; - pref[i].rightParent[1] = ""; - } - for (int i = 0; i < left.size(); i++) { SeqMap currentLeft = left[i]; //example i = 3; currentLeft is a map of 0 to the distance of sequence 3 to sequence 0, @@ -283,6 +546,8 @@ void Bellerophon::generatePreferences(vector left, vector right, SeqMap currentRight = right[i]; // same as left but with distances on the right side. for (int j = 0; j < i; j++) { + + if (m->control_pressed) { return 0; } itL = currentLeft.find(j); itR = currentRight.find(j); @@ -293,15 +558,15 @@ void Bellerophon::generatePreferences(vector left, vector right, if ((itL != currentLeft.end()) && (itR != currentRight.end())) { if (!correction) { - pref[i].score[1] += abs((itL->second - itR->second)); - pref[j].score[1] += abs((itL->second - itR->second)); + pref[count][i].score += abs((itL->second - itR->second)); + pref[count][j].score += abs((itL->second - itR->second)); //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl; //cout << "abs = " << abs((itL->second - itR->second)) << endl; //cout << i << " score = " << pref[i].score[1] << endl; //cout << j << " score = " << pref[j].score[1] << endl; }else { - pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second))); - pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second))); + pref[count][i].score += abs((sqrt(itL->second) - sqrt(itR->second))); + pref[count][j].score += abs((sqrt(itL->second) - sqrt(itR->second))); //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl; //cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl; //cout << i << " score = " << pref[i].score[1] << endl; @@ -309,27 +574,27 @@ void Bellerophon::generatePreferences(vector left, vector right, } //cout << "pref[" << i << "].closestLeft[1] = " << pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl; //are you the closest left sequence - if (itL->second < pref[i].closestLeft[1]) { + if (itL->second < pref[count][i].closestLeft) { - pref[i].closestLeft[1] = itL->second; - pref[i].leftParent[1] = seqs[j]->getName(); + pref[count][i].closestLeft = itL->second; + pref[count][i].leftParent = seqs[j]->getName(); //cout << "updating closest left to " << pref[i].leftParent[1] << endl; } //cout << "pref[" << j << "].closestLeft[1] = " << pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl; - if (itL->second < pref[j].closestLeft[1]) { - pref[j].closestLeft[1] = itL->second; - pref[j].leftParent[1] = seqs[i]->getName(); + if (itL->second < pref[count][j].closestLeft) { + pref[count][j].closestLeft = itL->second; + pref[count][j].leftParent = seqs[i]->getName(); //cout << "updating closest left to " << pref[j].leftParent[1] << endl; } //are you the closest right sequence - if (itR->second < pref[i].closestRight[1]) { - pref[i].closestRight[1] = itR->second; - pref[i].rightParent[1] = seqs[j]->getName(); + if (itR->second < pref[count][i].closestRight) { + pref[count][i].closestRight = itR->second; + pref[count][i].rightParent = seqs[j]->getName(); } - if (itR->second < pref[j].closestRight[1]) { - pref[j].closestRight[1] = itR->second; - pref[j].rightParent[1] = seqs[i]->getName(); + if (itR->second < pref[count][j].closestRight) { + pref[count][j].closestRight = itR->second; + pref[count][j].rightParent = seqs[i]->getName(); } } @@ -337,50 +602,190 @@ void Bellerophon::generatePreferences(vector left, vector right, } + + return 1; + + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "generatePreferences"); + exit(1); + } +} +/**************************************************************************************************/ +vector Bellerophon::getBestPref() { + try { + + vector best; - - //calculate the dme + //for each sequence + for (int i = 0; i < numSeqs; i++) { + + //set best pref score to first one + Preference temp = pref[0][i]; + + if (m->control_pressed) { return best; } + + //for each window + for (int j = 1; j < pref.size(); j++) { + + //is this a better score + if (pref[j][i].score > temp.score) { temp = pref[j][i]; } + } + + best.push_back(temp); + } - int count0 = 0; - for (int i = 0; i < pref.size(); i++) { dme += pref[i].score[1]; if (pref[i].score[1] == 0.0) { count0++; } } + //rank preference score to eachother + float dme = 0.0; + float expectedPercent = 1 / (float) (best.size()); - float expectedPercent = 1 / (float) (pref.size() - count0); -//cout << endl << "dme = " << dme << endl; - //recalculate prefernences based on dme - for (int i = 0; i < pref.size(); i++) { -//cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl; - // gives the actual percentage of the dme this seq adds - pref[i].score[1] = pref[i].score[1] / dme; + for (int i = 0; i < best.size(); i++) { dme += best[i].score; } + + for (int i = 0; i < best.size(); i++) { + + if (m->control_pressed) { return best; } + + //gives the actual percentage of the dme this seq adds + best[i].score = best[i].score / dme; //how much higher or lower is this than expected - pref[i].score[1] = pref[i].score[1] / expectedPercent; + best[i].score = best[i].score / expectedPercent; + + } + + //sort Preferences highest to lowest + sort(best.begin(), best.end(), comparePref); + + return best; + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "getBestPref"); + exit(1); + } +} +/**************************************************************************************************/ +int Bellerophon::writePrefs(string file, linePair tempLine) { + try { + + ofstream outTemp; + m->openOutputFile(file, outTemp); + + //lets you know what part of the pref matrix you are writing + outTemp << tempLine.start << '\t' << tempLine.num << endl; + + for (int i = tempLine.start; i < (tempLine.start + tempLine.num); i++) { + + for (int j = 0; j < numSeqs; j++) { + + if (m->control_pressed) { outTemp.close(); m->mothurRemove(file); return 0; } + + outTemp << pref[i][j].name << '\t' << pref[i][j].leftParent << '\t' << pref[i][j].rightParent << '\t'; + outTemp << pref[i][j].score << '\t' << pref[i][j].closestLeft << '\t' << pref[i][j].closestRight << '\t' << pref[i][j].midpoint << endl; + } + } + + outTemp.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "writePrefs"); + exit(1); + } +} +/**************************************************************************************************/ +int Bellerophon::readPrefs(string file) { + try { + + ifstream inTemp; + m->openInputFile(file, inTemp); + + int start, num; + + //lets you know what part of the pref matrix you are writing + inTemp >> start >> num; m->gobble(inTemp); + + for (int i = start; i < num; i++) { - //pref[i].score[1] = dme / (dme - 2 * pref[i].score[1]); + for (int j = 0; j < numSeqs; j++) { + + if (m->control_pressed) { inTemp.close(); m->mothurRemove(file); return 0; } - //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher. -//cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl; + inTemp >> pref[i][j].name >> pref[i][j].leftParent >> pref[i][j].rightParent; + inTemp >> pref[i][j].score >> pref[i][j].closestLeft >> pref[i][j].closestRight >> pref[i][j].midpoint; + m->gobble(inTemp); + } } - //is this score bigger then the last score - for (int i = 0; i < pref.size(); i++) { - - //update biggest score - if (pref[i].score[1] > pref[i].score[0]) { - pref[i].score[0] = pref[i].score[1]; - pref[i].leftParent[0] = pref[i].leftParent[1]; - pref[i].rightParent[0] = pref[i].rightParent[1]; - pref[i].closestLeft[0] = pref[i].closestLeft[1]; - pref[i].closestRight[0] = pref[i].closestRight[1]; - pref[i].midpoint = mid; + inTemp.close(); + + m->mothurRemove(file); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "writePrefs"); + exit(1); + } +} +/**************************************************************************************************/ +vector Bellerophon::getBestWindow(linePair line) { + try { + + vector best; + + //for each sequence + for (int i = 0; i < numSeqs; i++) { + + //set best pref score to first one + Preference temp = pref[line.start][i]; + + if (m->control_pressed) { return best; } + + //for each window + for (int j = (line.start+1); j < (line.start+line.num); j++) { + + //is this a better score + if (pref[j][i].score > temp.score) { temp = pref[j][i]; } } + string tempString = temp.name + '\t' + temp.leftParent + '\t' + temp.rightParent + '\t' + toString(temp.score); + best.push_back(tempString); + } + + return best; + + } + catch(exception& e) { + m->errorOut(e, "Bellerophon", "getBestWindow"); + exit(1); + } +} +/**************************************************************************************************/ +int Bellerophon::fillPref(int process, vector& best) { + try { + //figure out where you start so you can put the best scores there + int numSeqsPerProcessor = iters / processors; + int start = process * numSeqsPerProcessor; + + for (int i = 0; i < best.size(); i++) { + + if (m->control_pressed) { return 0; } + + istringstream iss (best[i],istringstream::in); + + string tempScore; + iss >> pref[start][i].name >> pref[start][i].leftParent >> pref[start][i].rightParent >> tempScore; + convert(tempScore, pref[start][i].score); } + return 0; } catch(exception& e) { - errorOut(e, "Bellerophon", "generatePreferences"); + m->errorOut(e, "Bellerophon", "fillPref"); exit(1); } } + /**************************************************************************************************/