#include "onegapdist.h"
-//***************************************************************************************************************
+/***************************************************************************************************************/
-Bellerophon::Bellerophon(string name, string o) {
+Bellerophon::Bellerophon(string name, bool filterSeqs, bool c, int win, int inc, int p, string o) : Chimera() {
try {
fastafile = name;
+ correction = c;
outputDir = o;
+ window = win;
+ increment = inc;
+ processors = p;
+
+ //read in sequences
+ seqs = readSeqs(fastafile);
+ numSeqs = seqs.size();
+ if (numSeqs == 0) { m->mothurOut("Error in reading you sequences."); m->mothurOutEndLine(); exit(1); }
+
+ //do soft filter
+ if (filterSeqs) {
+ createFilter(seqs, 0.5);
+ for (int i = 0; i < seqs.size(); i++) { runFilter(seqs[i]); }
+ }
+
+ distCalculator = new eachGapDist();
+
+ //set default window to 25% of sequence length
+ string seq0 = seqs[0]->getAligned();
+ if (window == 0) { window = seq0.length() / 4; }
+ else if (window > (seq0.length() / 2)) {
+ m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); m->mothurOutEndLine();
+ window = (seq0.length() / 2);
+ }
+
+ if (increment > (seqs[0]->getAlignLength() - (2*window))) {
+ if (increment != 10) {
+
+ m->mothurOut("You have selected a increment that is too large. I will use the default."); m->mothurOutEndLine();
+ increment = 10;
+ if (increment > (seqs[0]->getAlignLength() - (2*window))) { increment = 0; }
+
+ }else{ increment = 0; }
+ }
+
+ if (increment == 0) { iters = 1; }
+ else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
+
+ //initialize pref
+ pref.resize(iters);
+ for (int i = 0; i < iters; i++) {
+ Preference temp;
+ for (int j = 0; j < numSeqs; j++) {
+ pref[i].push_back(temp);
+ }
+ }
+
}
catch(exception& e) {
- errorOut(e, "Bellerophon", "Bellerophon");
+ m->errorOut(e, "Bellerophon", "Bellerophon");
exit(1);
}
}
//***************************************************************************************************************
-void Bellerophon::print(ostream& out) {
+int Bellerophon::print(ostream& out, ostream& outAcc) {
try {
int above1 = 0;
+
+ //sorted "best" preference scores for all seqs
+ vector<Preference> best = getBestPref();
+
+ if (m->control_pressed) { return numSeqs; }
+
out << "Name\tScore\tLeft\tRight\t" << endl;
//output prefenence structure to .chimeras file
- for (int i = 0; i < pref.size(); i++) {
- out << pref[i].name << '\t' << setprecision(3) << pref[i].score[0] << '\t' << pref[i].leftParent[0] << '\t' << pref[i].rightParent[0] << endl;
+ for (int i = 0; i < best.size(); i++) {
+
+ if (m->control_pressed) { return numSeqs; }
+
+ out << best[i].name << '\t' << setprecision(3) << best[i].score << '\t' << best[i].leftParent << '\t' << best[i].rightParent << endl;
//calc # of seqs with preference above 1.0
- if (pref[i].score[0] > 1.0) {
+ if (best[i].score > 1.0) {
above1++;
- mothurOut(pref[i].name + " is a suspected chimera at breakpoint " + toString(pref[i].midpoint)); mothurOutEndLine();
- mothurOut("It's score is " + toString(pref[i].score[0]) + " with suspected left parent " + pref[i].leftParent[0] + " and right parent " + pref[i].rightParent[0]); mothurOutEndLine();
+ outAcc << best[i].name << endl;
+ m->mothurOut(best[i].name + " is a suspected chimera at breakpoint " + toString(best[i].midpoint)); m->mothurOutEndLine();
+ m->mothurOut("It's score is " + toString(best[i].score) + " with suspected left parent " + best[i].leftParent + " and right parent " + best[i].rightParent); m->mothurOutEndLine();
}
}
//output results to screen
- mothurOutEndLine();
- mothurOut("Sequence with preference score above 1.0: " + toString(above1)); mothurOutEndLine();
+ m->mothurOutEndLine();
+ m->mothurOut("Sequence with preference score above 1.0: " + toString(above1)); m->mothurOutEndLine();
int spot;
- spot = pref.size()-1;
- mothurOut("Minimum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.975;
- mothurOut("2.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.75;
- mothurOut("25%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.50;
- mothurOut("Median: \t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.25;
- mothurOut("75%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
- spot = pref.size() * 0.025;
- mothurOut("97.5%-tile:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+ spot = best.size()-1;
+ m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.975;
+ m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.75;
+ m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.50;
+ m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.25;
+ m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.025;
+ m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
spot = 0;
- mothurOut("Maximum:\t" + toString(pref[spot].score[0])); mothurOutEndLine();
+ m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+
+ return numSeqs;
}
catch(exception& e) {
- errorOut(e, "Bellerophon", "print");
+ m->errorOut(e, "Bellerophon", "print");
exit(1);
}
}
-
-//********************************************************************************************************************
-//sorts highest score to lowest
-inline bool comparePref(Preference left, Preference right){
- return (left.score[0] > right.score[0]);
-}
-
+#ifdef USE_MPI
//***************************************************************************************************************
-int Bellerophon::getChimeras() {
+int Bellerophon::print(MPI_File& out, MPI_File& outAcc) {
try {
+
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- //do soft filter
- if (filter) {
- string optionString = "fasta=" + fastafile + ", soft=50";
- if (outputDir != "") { optionString += ", outputdir=" + outputDir; }
+ if (pid == 0) {
+ string outString = "";
+
+ //sorted "best" preference scores for all seqs
+ vector<Preference> best = getBestPref();
+
+ int above1 = 0;
+ int ninetyfive = best.size() * 0.05;
+ float cutoffScore = best[ninetyfive].score;
+
+ if (m->control_pressed) { return numSeqs; }
+
+ outString = "Name\tScore\tLeft\tRight\n";
+ MPI_Status status;
+ int olength = outString.length();
+ char* buf5 = new char[olength];
+ memcpy(buf5, outString.c_str(), olength);
+
+ MPI_File_write_shared(out, buf5, olength, MPI_CHAR, &status);
- filterSeqs = new FilterSeqsCommand(optionString);
- filterSeqs->execute();
- delete filterSeqs;
+ delete buf5;
+
+ //output prefenence structure to .chimeras file
+ for (int i = 0; i < best.size(); i++) {
+
+ if (m->control_pressed) { return numSeqs; }
+
+ outString = best[i].name + "\t" + toString(best[i].score) + "\t" + best[i].leftParent + "\t" + best[i].rightParent + "\n";
- //reset fastafile to filtered file
- if (outputDir == "") { fastafile = getRootName(fastafile) + "filter.fasta"; }
- else { fastafile = outputDir + getRootName(getSimpleName(fastafile)) + "filter.fasta"; }
+ MPI_Status status;
+ int length = outString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outString.c_str(), length);
+
+ MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
+
+ delete buf2;
+
+ //calc # of seqs with preference above 95%tile
+ if (best[i].score >= cutoffScore) {
+ above1++;
+ string outAccString = "";
+ outAccString += best[i].name + "\n";
+
+ MPI_Status statusAcc;
+ length = outAccString.length();
+ char* buf = new char[length];
+ memcpy(buf, outAccString.c_str(), length);
+
+ MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
+
+ delete buf;
+
+ cout << best[i].name << " is a suspected chimera at breakpoint " << toString(best[i].midpoint) << endl;
+ cout << "It's score is " << toString(best[i].score) << " with suspected left parent " << best[i].leftParent << " and right parent " << best[i].rightParent << endl;
+ }
+ }
+
+ //output results to screen
+ m->mothurOutEndLine();
+ m->mothurOut("Sequence with preference score above " + toString(cutoffScore) + ": " + toString(above1)); m->mothurOutEndLine();
+ int spot;
+ spot = best.size()-1;
+ m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.975;
+ m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.75;
+ m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.50;
+ m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.25;
+ m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = best.size() * 0.025;
+ m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
+ spot = 0;
+ m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
}
- distCalculator = new eachGapDist();
-
- //read in sequences
- seqs = readSeqs(fastafile);
-
- if (unaligned) { mothurOut("Your sequences need to be aligned when you use the bellerophon method."); mothurOutEndLine(); return 1; }
+ return numSeqs;
- int numSeqs = seqs.size();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "print");
+ exit(1);
+ }
+}
+#endif
+//********************************************************************************************************************
+//sorts highest score to lowest
+inline bool comparePref(Preference left, Preference right){
+ return (left.score > right.score);
+}
+//***************************************************************************************************************
+int Bellerophon::getChimeras() {
+ try {
- if (numSeqs == 0) { mothurOut("Error in reading you sequences."); mothurOutEndLine(); exit(1); }
+ //create breaking points
+ vector<int> midpoints; midpoints.resize(iters, window);
+ for (int i = 1; i < iters; i++) { midpoints[i] = midpoints[i-1] + increment; }
+
+ #ifdef USE_MPI
+ int pid, numSeqsPerProcessor;
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
- //set default window to 25% of sequence length
- string seq0 = seqs[0]->getAligned();
- if (window == 0) { window = seq0.length() / 4; }
- else if (window > (seq0.length() / 2)) {
- mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); mothurOutEndLine();
- window = (seq0.length() / 2);
- }
+ numSeqsPerProcessor = iters / processors;
- if (increment > (seqs[0]->getAlignLength() - (2*window))) {
- if (increment != 10) {
-
- mothurOut("You have selected a increment that is too large. I will use the default."); mothurOutEndLine();
- increment = 10;
- if (increment > (seqs[0]->getAlignLength() - (2*window))) { increment = 0; }
-
- }else{ increment = 0; }
+ //each process hits this only once
+ unsigned long int startPos = pid * numSeqsPerProcessor;
+ if(pid == processors - 1){
+ numSeqsPerProcessor = iters - pid * numSeqsPerProcessor;
}
+ lines.push_back(linePair(startPos, numSeqsPerProcessor));
- if (increment == 0) { iters = 1; }
- else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
+ //fill pref with scores
+ driverChimeras(midpoints, lines[0]);
- //initialize pref
- pref.resize(numSeqs);
-
- for (int i = 0; i < numSeqs; i++ ) {
- pref[i].leftParent.resize(2); pref[i].rightParent.resize(2); pref[i].score.resize(2); pref[i].closestLeft.resize(2); pref[i].closestRight.resize(3);
- pref[i].name = seqs[i]->getName();
- pref[i].score[0] = 0.0; pref[i].score[1] = 0.0;
- pref[i].closestLeft[0] = 100000.0; pref[i].closestLeft[1] = 100000.0;
- pref[i].closestRight[0] = 100000.0; pref[i].closestRight[1] = 100000.0;
- }
-
- int midpoint = window;
- int count = 0;
- while (count < iters) {
+ if (m->control_pressed) { return 0; }
- //create 2 vectors of sequences, 1 for left side and one for right side
- vector<Sequence> left; vector<Sequence> right;
+ //each process must send its parts back to pid 0
+ if (pid == 0) {
+
+ //receive results
+ for (int j = 1; j < processors; j++) {
- for (int i = 0; i < seqs.size(); i++) {
-//cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
-//cout << "whole = " << seqs[i]->getAligned().length() << endl;
- //save left side
- string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
- Sequence tempLeft;
- tempLeft.setName(seqs[i]->getName());
- tempLeft.setAligned(seqLeft);
- left.push_back(tempLeft);
-//cout << "left = " << tempLeft.getAligned().length() << endl;
- //save right side
- string seqRight = seqs[i]->getAligned().substr(midpoint, window);
- Sequence tempRight;
- tempRight.setName(seqs[i]->getName());
- tempRight.setAligned(seqRight);
- right.push_back(tempRight);
-//cout << "right = " << seqRight.length() << endl;
- }
+ vector<string> MPIBestSend;
+ for (int i = 0; i < numSeqs; i++) {
- //adjust midpoint by increment
- midpoint += increment;
+ if (m->control_pressed) { return 0; }
+
+ MPI_Status status;
+ //receive string
+ int length;
+ MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status);
+
+ char* buf = new char[length];
+ MPI_Recv(&buf, length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
+
+ string temp = buf;
+ if (temp.length() > length) { temp = temp.substr(0, length); }
+ delete buf;
+
+ MPIBestSend.push_back(temp);
+ }
+ fillPref(j, MPIBestSend);
- //this should be parallelized
- //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
- //create a matrix containing the distance from left to left and right to right
- //calculate distances
- SparseMatrix* SparseLeft = new SparseMatrix();
- SparseMatrix* SparseRight = new SparseMatrix();
+ if (m->control_pressed) { return 0; }
+ }
+
+ }else {
+ //takes best window for each sequence and turns Preference to string that can be parsed by pid 0.
+ //played with this a bit, but it may be better to try user-defined datatypes with set string lengths??
+ vector<string> MPIBestSend = getBestWindow(lines[0]);
+ pref.clear();
+
+ //send your result to parent
+ for (int i = 0; i < numSeqs; i++) {
- createSparseMatrix(0, left.size(), SparseLeft, left);
- createSparseMatrix(0, right.size(), SparseRight, right);
+ if (m->control_pressed) { return 0; }
- vector<SeqMap> distMapRight;
- vector<SeqMap> distMapLeft;
+ int bestLength = MPIBestSend[i].length();
+ char* buf = new char[bestLength];
+ memcpy(buf, MPIBestSend[i].c_str(), bestLength);
- // Create a data structure to quickly access the distance information.
- //this is from thallingers reimplementation on get.oturep
- // It consists of a vector of distance maps, where each map contains
- // all distances of a certain sequence. Vector and maps are accessed
- // via the index of a sequence in the distance matrix
- distMapRight = vector<SeqMap>(numSeqs);
- distMapLeft = vector<SeqMap>(numSeqs);
- //cout << "left" << endl << endl;
- for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
- distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
- //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
- }
- //cout << "right" << endl << endl;
- for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
- distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
- //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
- }
+ MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD);
+ MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD);
+ delete buf;
+ }
+
+ MPIBestSend.clear();
+ }
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ #else
+
+ //divide breakpoints between processors
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ if(processors == 1){
+ lines.push_back(linePair(0, iters));
- delete SparseLeft;
- delete SparseRight;
+ //fill pref with scores
+ driverChimeras(midpoints, lines[0]);
+
+ }else{
+
+ int numSeqsPerProcessor = iters / processors;
- //fill preference structure
- generatePreferences(distMapLeft, distMapRight, midpoint);
+ for (int i = 0; i < processors; i++) {
+ unsigned long int startPos = i * numSeqsPerProcessor;
+ if(i == processors - 1){
+ numSeqsPerProcessor = iters - i * numSeqsPerProcessor;
+ }
+ lines.push_back(linePair(startPos, numSeqsPerProcessor));
+ }
- count++;
+ createProcesses(midpoints);
+ }
+ #else
+ lines.push_back(linePair(0, iters));
+
+ ///fill pref with scores
+ driverChimeras(midpoints, lines[0]);
+ #endif
+
+ #endif
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "getChimeras");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+int Bellerophon::createProcesses(vector<int> mid) {
+ try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ int process = 0;
+ int exitCommand = 1;
+ vector<int> processIDS;
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ exitCommand = driverChimeras(mid, lines[process]);
+ string tempOut = outputDir + toString(getpid()) + ".temp";
+ writePrefs(tempOut, lines[process]);
+ exit(0);
+ }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
- delete distCalculator;
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processors;i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
- //rank preference score to eachother
- float dme = 0.0;
- float expectedPercent = 1 / (float) (pref.size());
+ //get info that other processes created
+ for (int i = 0; i < processIDS.size(); i++) {
+ string tempIn = outputDir + toString(processIDS[i]) + ".temp";
+ readPrefs(tempIn);
+ }
- for (int i = 0; i < pref.size(); i++) { dme += pref[i].score[0]; }
-
- for (int i = 0; i < pref.size(); i++) {
-
- //gives the actual percentage of the dme this seq adds
- pref[i].score[0] = pref[i].score[0] / dme;
-
- //how much higher or lower is this than expected
- pref[i].score[0] = pref[i].score[0] / expectedPercent;
+ return exitCommand;
+#endif
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignCommand", "createProcesses");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+int Bellerophon::driverChimeras(vector<int> midpoints, linePair line) {
+ try {
- }
+ for (int h = line.start; h < (line.start + line.num); h++) {
+ count = h;
+ int midpoint = midpoints[h];
- //sort Preferences highest to lowest
- sort(pref.begin(), pref.end(), comparePref);
+ //initialize pref[count]
+ for (int i = 0; i < numSeqs; i++ ) {
+ pref[count][i].name = seqs[i]->getName();
+ pref[count][i].midpoint = midpoint;
+ }
+
+ if (m->control_pressed) { return 0; }
+
+ //create 2 vectors of sequences, 1 for left side and one for right side
+ vector<Sequence> left; vector<Sequence> right;
+
+ for (int i = 0; i < seqs.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ //cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
+ //cout << "whole = " << seqs[i]->getAligned().length() << endl;
+ //save left side
+ string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
+ Sequence tempLeft;
+ tempLeft.setName(seqs[i]->getName());
+ tempLeft.setAligned(seqLeft);
+ left.push_back(tempLeft);
+ //cout << "left = " << tempLeft.getAligned().length() << endl;
+ //save right side
+ string seqRight = seqs[i]->getAligned().substr(midpoint, window);
+ Sequence tempRight;
+ tempRight.setName(seqs[i]->getName());
+ tempRight.setAligned(seqRight);
+ right.push_back(tempRight);
+ //cout << "right = " << seqRight.length() << endl;
+ }
+
+ //this should be parallelized
+ //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
+ //create a matrix containing the distance from left to left and right to right
+ //calculate distances
+ SparseMatrix* SparseLeft = new SparseMatrix();
+ SparseMatrix* SparseRight = new SparseMatrix();
+
+ createSparseMatrix(0, left.size(), SparseLeft, left);
+
+ if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+
+ createSparseMatrix(0, right.size(), SparseRight, right);
+
+ if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+
+ left.clear(); right.clear();
+ vector<SeqMap> distMapRight;
+ vector<SeqMap> distMapLeft;
+
+ // Create a data structure to quickly access the distance information.
+ //this is from thallingers reimplementation on get.oturep
+ // It consists of a vector of distance maps, where each map contains
+ // all distances of a certain sequence. Vector and maps are accessed
+ // via the index of a sequence in the distance matrix
+ distMapRight = vector<SeqMap>(numSeqs);
+ distMapLeft = vector<SeqMap>(numSeqs);
+ //cout << "left" << endl << endl;
+ for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
+ distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
+ if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+ //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+ }
+ //cout << "right" << endl << endl;
+ for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
+ distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
+ if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
+ //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
+ }
+
+ delete SparseLeft;
+ delete SparseRight;
+
+ //fill preference structure
+ generatePreferences(distMapLeft, distMapRight, midpoint);
+
+ if (m->control_pressed) { return 0; }
+
+ //report progress
+ if((h+1) % 10 == 0){ cout << "Processing sliding window: " << toString(h+1) << "\n"; m->mothurOutJustToLog("Processing sliding window: " + toString(h+1) + "\n") ; }
+
+ }
+ //report progress
+ if((line.start + line.num) % 10 != 0){ cout << "Processing sliding window: " << toString(line.start + line.num) << "\n"; m->mothurOutJustToLog("Processing sliding window: " + toString(line.start + line.num) + "\n") ; }
+
return 0;
}
catch(exception& e) {
- errorOut(e, "Bellerophon", "getChimeras");
+ m->errorOut(e, "Bellerophon", "driverChimeras");
exit(1);
}
}
for(int i=startSeq; i<endSeq; i++){
for(int j=0;j<i;j++){
+
+ if (m->control_pressed) { return 0; }
distCalculator->calcDist(s[i], s[j]);
float dist = distCalculator->getDist();
return 1;
}
catch(exception& e) {
- errorOut(e, "Bellerophon", "createSparseMatrix");
+ m->errorOut(e, "Bellerophon", "createSparseMatrix");
exit(1);
}
}
/***************************************************************************************************************/
-void Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
+int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
try {
float dme = 0.0;
SeqMap::iterator itR;
SeqMap::iterator itL;
- //initialize pref[i]
- for (int i = 0; i < pref.size(); i++) {
- pref[i].score[1] = 0.0;
- pref[i].closestLeft[1] = 100000.0;
- pref[i].closestRight[1] = 100000.0;
- pref[i].leftParent[1] = "";
- pref[i].rightParent[1] = "";
- }
-
for (int i = 0; i < left.size(); i++) {
SeqMap currentLeft = left[i]; //example i = 3; currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
SeqMap currentRight = right[i]; // same as left but with distances on the right side.
for (int j = 0; j < i; j++) {
+
+ if (m->control_pressed) { return 0; }
itL = currentLeft.find(j);
itR = currentRight.find(j);
if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
if (!correction) {
- pref[i].score[1] += abs((itL->second - itR->second));
- pref[j].score[1] += abs((itL->second - itR->second));
+ pref[count][i].score += abs((itL->second - itR->second));
+ pref[count][j].score += abs((itL->second - itR->second));
//cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
//cout << "abs = " << abs((itL->second - itR->second)) << endl;
//cout << i << " score = " << pref[i].score[1] << endl;
//cout << j << " score = " << pref[j].score[1] << endl;
}else {
- pref[i].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
- pref[j].score[1] += abs((sqrt(itL->second) - sqrt(itR->second)));
+ pref[count][i].score += abs((sqrt(itL->second) - sqrt(itR->second)));
+ pref[count][j].score += abs((sqrt(itL->second) - sqrt(itR->second)));
//cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
//cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
//cout << i << " score = " << pref[i].score[1] << endl;
}
//cout << "pref[" << i << "].closestLeft[1] = " << pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;
//are you the closest left sequence
- if (itL->second < pref[i].closestLeft[1]) {
+ if (itL->second < pref[count][i].closestLeft) {
- pref[i].closestLeft[1] = itL->second;
- pref[i].leftParent[1] = seqs[j]->getName();
+ pref[count][i].closestLeft = itL->second;
+ pref[count][i].leftParent = seqs[j]->getName();
//cout << "updating closest left to " << pref[i].leftParent[1] << endl;
}
//cout << "pref[" << j << "].closestLeft[1] = " << pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;
- if (itL->second < pref[j].closestLeft[1]) {
- pref[j].closestLeft[1] = itL->second;
- pref[j].leftParent[1] = seqs[i]->getName();
+ if (itL->second < pref[count][j].closestLeft) {
+ pref[count][j].closestLeft = itL->second;
+ pref[count][j].leftParent = seqs[i]->getName();
//cout << "updating closest left to " << pref[j].leftParent[1] << endl;
}
//are you the closest right sequence
- if (itR->second < pref[i].closestRight[1]) {
- pref[i].closestRight[1] = itR->second;
- pref[i].rightParent[1] = seqs[j]->getName();
+ if (itR->second < pref[count][i].closestRight) {
+ pref[count][i].closestRight = itR->second;
+ pref[count][i].rightParent = seqs[j]->getName();
}
- if (itR->second < pref[j].closestRight[1]) {
- pref[j].closestRight[1] = itR->second;
- pref[j].rightParent[1] = seqs[i]->getName();
+ if (itR->second < pref[count][j].closestRight) {
+ pref[count][j].closestRight = itR->second;
+ pref[count][j].rightParent = seqs[i]->getName();
}
}
}
+
+ return 1;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "generatePreferences");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<Preference> Bellerophon::getBestPref() {
+ try {
+
+ vector<Preference> best;
-
- //calculate the dme
- int count0 = 0;
- for (int i = 0; i < pref.size(); i++) { dme += pref[i].score[1]; if (pref[i].score[1] == 0.0) { count0++; } }
+ //for each sequence
+ for (int i = 0; i < numSeqs; i++) {
+
+ //set best pref score to first one
+ Preference temp = pref[0][i];
+
+ if (m->control_pressed) { return best; }
+
+ //for each window
+ for (int j = 1; j < pref.size(); j++) {
+
+ //is this a better score
+ if (pref[j][i].score > temp.score) { temp = pref[j][i]; }
+ }
+
+ best.push_back(temp);
+ }
+
+ //rank preference score to eachother
+ float dme = 0.0;
+ float expectedPercent = 1 / (float) (best.size());
- float expectedPercent = 1 / (float) (pref.size() - count0);
-//cout << endl << "dme = " << dme << endl;
- //recalculate prefernences based on dme
- for (int i = 0; i < pref.size(); i++) {
-//cout << "unadjusted pref " << i << " = " << pref[i].score[1] << endl;
- // gives the actual percentage of the dme this seq adds
- pref[i].score[1] = pref[i].score[1] / dme;
+ for (int i = 0; i < best.size(); i++) { dme += best[i].score; }
+
+ for (int i = 0; i < best.size(); i++) {
+
+ if (m->control_pressed) { return best; }
+
+ //gives the actual percentage of the dme this seq adds
+ best[i].score = best[i].score / dme;
//how much higher or lower is this than expected
- pref[i].score[1] = pref[i].score[1] / expectedPercent;
+ best[i].score = best[i].score / expectedPercent;
+
+ }
+
+ //sort Preferences highest to lowest
+ sort(best.begin(), best.end(), comparePref);
+
+ return best;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "getBestPref");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int Bellerophon::writePrefs(string file, linePair tempLine) {
+ try {
+
+ ofstream outTemp;
+ openOutputFile(file, outTemp);
+
+ //lets you know what part of the pref matrix you are writing
+ outTemp << tempLine.start << '\t' << tempLine.num << endl;
+
+ for (int i = tempLine.start; i < (tempLine.start + tempLine.num); i++) {
+
+ for (int j = 0; j < numSeqs; j++) {
+
+ if (m->control_pressed) { outTemp.close(); remove(file.c_str()); return 0; }
+
+ outTemp << pref[i][j].name << '\t' << pref[i][j].leftParent << '\t' << pref[i][j].rightParent << '\t';
+ outTemp << pref[i][j].score << '\t' << pref[i][j].closestLeft << '\t' << pref[i][j].closestRight << '\t' << pref[i][j].midpoint << endl;
+ }
+ }
+
+ outTemp.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "writePrefs");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int Bellerophon::readPrefs(string file) {
+ try {
+
+ ifstream inTemp;
+ openInputFile(file, inTemp);
+
+ int start, num;
+
+ //lets you know what part of the pref matrix you are writing
+ inTemp >> start >> num; gobble(inTemp);
+
+ for (int i = start; i < num; i++) {
- //pref[i].score[1] = dme / (dme - 2 * pref[i].score[1]);
+ for (int j = 0; j < numSeqs; j++) {
+
+ if (m->control_pressed) { inTemp.close(); remove(file.c_str()); return 0; }
- //so a non chimeric sequence would be around 1, and a chimeric would be signifigantly higher.
-//cout << "adjusted pref " << i << " = " << pref[i].score[1] << endl;
+ inTemp >> pref[i][j].name >> pref[i][j].leftParent >> pref[i][j].rightParent;
+ inTemp >> pref[i][j].score >> pref[i][j].closestLeft >> pref[i][j].closestRight >> pref[i][j].midpoint;
+ gobble(inTemp);
+ }
}
- //is this score bigger then the last score
- for (int i = 0; i < pref.size(); i++) {
-
- //update biggest score
- if (pref[i].score[1] > pref[i].score[0]) {
- pref[i].score[0] = pref[i].score[1];
- pref[i].leftParent[0] = pref[i].leftParent[1];
- pref[i].rightParent[0] = pref[i].rightParent[1];
- pref[i].closestLeft[0] = pref[i].closestLeft[1];
- pref[i].closestRight[0] = pref[i].closestRight[1];
- pref[i].midpoint = mid;
+ inTemp.close();
+
+ remove(file.c_str());
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "writePrefs");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<string> Bellerophon::getBestWindow(linePair line) {
+ try {
+
+ vector<string> best;
+
+ //for each sequence
+ for (int i = 0; i < numSeqs; i++) {
+
+ //set best pref score to first one
+ Preference temp = pref[line.start][i];
+
+ if (m->control_pressed) { return best; }
+
+ //for each window
+ for (int j = (line.start+1); j < (line.start+line.num); j++) {
+
+ //is this a better score
+ if (pref[j][i].score > temp.score) { temp = pref[j][i]; }
}
+ string tempString = temp.name + '\t' + temp.leftParent + '\t' + temp.rightParent + '\t' + toString(temp.score);
+ best.push_back(tempString);
}
+ return best;
+
}
catch(exception& e) {
- errorOut(e, "Bellerophon", "generatePreferences");
+ m->errorOut(e, "Bellerophon", "getBestWindow");
exit(1);
}
}
/**************************************************************************************************/
+int Bellerophon::fillPref(int process, vector<string>& best) {
+ try {
+ //figure out where you start so you can put the best scores there
+ int numSeqsPerProcessor = iters / processors;
+ int start = process * numSeqsPerProcessor;
+
+ for (int i = 0; i < best.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ istringstream iss (best[i],istringstream::in);
+
+ string tempScore;
+ iss >> pref[start][i].name >> pref[start][i].leftParent >> pref[start][i].rightParent >> tempScore;
+ convert(tempScore, pref[start][i].score);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bellerophon", "fillPref");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/