]> git.donarmstrong.com Git - mothur.git/blob - bellerophon.cpp
e33230ed8b9949550bf716cb82e4c7bf6a6353ae
[mothur.git] / bellerophon.cpp
1 /*
2  *  bellerophon.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "bellerophon.h"
11 #include "eachgapdist.h"
12 #include "ignoregaps.h"
13 #include "onegapdist.h"
14
15
16 /***************************************************************************************************************/
17
18 Bellerophon::Bellerophon(string name, bool filterSeqs,  bool c, int win, int inc, int p, string o) : Chimera() {
19         try {
20                 fastafile = name;
21                 correction = c;
22                 outputDir = o;
23                 window = win;
24                 increment = inc;
25                 processors = p;
26                 
27                 //read in sequences
28                 seqs = readSeqs(fastafile);
29                 numSeqs = seqs.size();
30                 if (numSeqs == 0) { m->mothurOut("Error in reading you sequences."); m->mothurOutEndLine(); exit(1); }
31         
32                 //do soft filter
33                 if (filterSeqs)  {
34                         createFilter(seqs, 0.5);
35                         for (int i = 0; i < seqs.size(); i++) {  runFilter(seqs[i]);  }
36                 }
37                 
38                 distCalculator = new eachGapDist();
39                 
40                 //set default window to 25% of sequence length
41                 string seq0 = seqs[0]->getAligned();
42                 if (window == 0) { window = seq0.length() / 4;  }
43                 else if (window > (seq0.length() / 2)) {  
44                         m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); m->mothurOutEndLine();
45                         window = (seq0.length() / 2);
46                 }
47                 
48                 if (increment > (seqs[0]->getAlignLength() - (2*window))) { 
49                         if (increment != 10) {
50                         
51                                 m->mothurOut("You have selected a increment that is too large. I will use the default."); m->mothurOutEndLine();
52                                 increment = 10;
53                                 if (increment > (seqs[0]->getAlignLength() - (2*window))) {  increment = 0;  }
54                                 
55                         }else{ increment = 0; }
56                 }
57                 
58                 if (increment == 0) { iters = 1; }
59                 else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
60                 
61                 //initialize pref
62                 pref.resize(iters);
63                 for (int i = 0; i < iters; i++) { 
64                         Preference temp;
65                         for (int j = 0; j < numSeqs; j++) {  
66                                 pref[i].push_back(temp); 
67                         }
68                 } 
69
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "Bellerophon", "Bellerophon");
73                 exit(1);
74         }
75 }
76
77 //***************************************************************************************************************
78 int Bellerophon::print(ostream& out, ostream& outAcc) {
79         try {
80                 int above1 = 0;
81                 
82                 //sorted "best" preference scores for all seqs
83                 vector<Preference> best = getBestPref();
84                 
85                 if (m->control_pressed) { return numSeqs; }
86                 
87                 out << "Name\tScore\tLeft\tRight\t" << endl;
88                 //output prefenence structure to .chimeras file
89                 for (int i = 0; i < best.size(); i++) {
90                         
91                         if (m->control_pressed) {  return numSeqs; }
92                         
93                         out << best[i].name << '\t' << setprecision(3) << best[i].score << '\t' << best[i].leftParent << '\t' << best[i].rightParent << endl;
94                         
95                         //calc # of seqs with preference above 1.0
96                         if (best[i].score > 1.0) { 
97                                 above1++; 
98                                 outAcc << best[i].name << endl;
99                                 m->mothurOut(best[i].name + " is a suspected chimera at breakpoint " + toString(best[i].midpoint)); m->mothurOutEndLine();
100                                 m->mothurOut("It's score is " + toString(best[i].score) + " with suspected left parent " + best[i].leftParent + " and right parent " + best[i].rightParent); m->mothurOutEndLine();
101                         }
102                 }
103                 
104                 //output results to screen
105                 m->mothurOutEndLine();
106                 m->mothurOut("Sequence with preference score above 1.0: " + toString(above1)); m->mothurOutEndLine();
107                 int spot;
108                 spot = best.size()-1;
109                 m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
110                 spot = best.size() * 0.975;
111                 m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
112                 spot = best.size() * 0.75;
113                 m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
114                 spot = best.size() * 0.50;
115                 m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
116                 spot = best.size() * 0.25;
117                 m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
118                 spot = best.size() * 0.025;
119                 m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
120                 spot = 0;
121                 m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
122                 
123                 return numSeqs;
124
125         }
126         catch(exception& e) {
127                 m->errorOut(e, "Bellerophon", "print");
128                 exit(1);
129         }
130 }
131 #ifdef USE_MPI
132 //***************************************************************************************************************
133 int Bellerophon::print(MPI_File& out, MPI_File& outAcc) {
134         try {
135                 
136                 int pid;
137                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
138                 
139                 if (pid == 0) {
140                         string outString = "";
141                                                 
142                         //sorted "best" preference scores for all seqs
143                         vector<Preference> best = getBestPref();
144                         
145                         int above1 = 0;
146                         int ninetyfive = best.size() * 0.05;
147                         float cutoffScore = best[ninetyfive].score;
148
149                         if (m->control_pressed) { return numSeqs; }
150                         
151                         outString += "Name\tScore\tLeft\tRight\n";
152                         //output prefenence structure to .chimeras file
153                         for (int i = 0; i < best.size(); i++) {
154                                 
155                                 if (m->control_pressed) {  return numSeqs; }
156                                 
157                                 outString += best[i].name + "\t" +  toString(best[i].score) + "\t" + best[i].leftParent + "\t" + best[i].rightParent + "\n";
158                                 
159                                 MPI_Status status;
160                                 int length = outString.length();
161                                 char* buf2 = new char[length];\r
162                                 memcpy(buf2, outString.c_str(), length);
163                                         
164                                 MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
165                                 
166                                 delete buf2;
167                                 
168                                 //calc # of seqs with preference above 95%tile
169                                 if (best[i].score >= cutoffScore) { 
170                                         above1++; 
171                                         string outAccString;
172                                          outAccString += best[i].name + "\n";
173                                         
174                                         MPI_Status statusAcc;
175                                         length = outAccString.length();
176                                         char* buf = new char[length];\r
177                                         memcpy(buf, outAccString.c_str(), length);
178                                         
179                                         MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
180                                         
181                                         delete buf;
182
183                                         cout << best[i].name << " is a suspected chimera at breakpoint " << toString(best[i].midpoint) << endl;
184                                         cout << "It's score is " << toString(best[i].score) << " with suspected left parent " << best[i].leftParent << " and right parent " << best[i].rightParent << endl;
185                                 }
186                         }
187                         
188                         //output results to screen
189                         m->mothurOutEndLine();
190                         m->mothurOut("Sequence with preference score above " + toString(cutoffScore) +  ": " + toString(above1)); m->mothurOutEndLine();
191                         int spot;
192                         spot = best.size()-1;
193                         m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
194                         spot = best.size() * 0.975;
195                         m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
196                         spot = best.size() * 0.75;
197                         m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
198                         spot = best.size() * 0.50;
199                         m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
200                         spot = best.size() * 0.25;
201                         m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
202                         spot = best.size() * 0.025;
203                         m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
204                         spot = 0;
205                         m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
206                         
207                 }
208                 
209                 return numSeqs;
210                 
211         }
212         catch(exception& e) {
213                 m->errorOut(e, "Bellerophon", "print");
214                 exit(1);
215         }
216 }
217 #endif
218 //********************************************************************************************************************
219 //sorts highest score to lowest
220 inline bool comparePref(Preference left, Preference right){
221         return (left.score > right.score);      
222 }
223 //***************************************************************************************************************
224 int Bellerophon::getChimeras() {
225         try {
226                 
227                 //create breaking points
228                 vector<int> midpoints;   midpoints.resize(iters, window);
229                 for (int i = 1; i < iters; i++) {  midpoints[i] = midpoints[i-1] + increment;  }
230         
231         #ifdef USE_MPI
232                 int pid, numSeqsPerProcessor; 
233         
234                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
235                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
236                 
237                 numSeqsPerProcessor = iters / processors;
238                 
239                 //each process hits this only once
240                 int startPos = pid * numSeqsPerProcessor;
241                 if(pid == processors - 1){
242                                 numSeqsPerProcessor = iters - pid * numSeqsPerProcessor;
243                 }
244                 lines.push_back(linePair(startPos, numSeqsPerProcessor));
245                 
246                 //fill pref with scores
247                 driverChimeras(midpoints, lines[0]);
248                 
249                 if (m->control_pressed) { return 0; }
250                                 
251                 //each process must send its parts back to pid 0
252                 if (pid == 0) {
253                         
254                         //receive results 
255                         for (int j = 1; j < processors; j++) {
256                                 
257                                 vector<string>  MPIBestSend; 
258                                 for (int i = 0; i < numSeqs; i++) {
259                                 
260                                         if (m->control_pressed) { return 0; }
261
262                                         MPI_Status status;
263                                         //receive string
264                                         int length;
265                                         MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status);
266                                         
267                                         char* buf = new char[length];
268                                         MPI_Recv(&buf, length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
269                                         
270                                         string temp = buf;
271                                         if (temp.length() > length) { temp = temp.substr(0, length); }
272                                         delete buf;
273
274                                         MPIBestSend.push_back(temp);
275                                 }
276                                 
277                                 fillPref(j, MPIBestSend);
278                                 
279                                 if (m->control_pressed) { return 0; }
280                         }
281
282                 }else {
283                         //takes best window for each sequence and turns Preference to string that can be parsed by pid 0.
284                         //played with this a bit, but it may be better to try user-defined datatypes with set string lengths??
285                         vector<string> MPIBestSend = getBestWindow(lines[0]);
286                         pref.clear();
287                         
288                         //send your result to parent
289                         for (int i = 0; i < numSeqs; i++) {
290                                 
291                                 if (m->control_pressed) { return 0; }
292                                 
293                                 int bestLength = MPIBestSend[i].length();
294                                 char* buf = new char[bestLength];\r
295                                 memcpy(buf, MPIBestSend[i].c_str(), bestLength);
296                                 
297                                 MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD);
298                                 MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD);
299                                 delete buf;
300                         }
301                         
302                         MPIBestSend.clear();
303                 }
304                 
305         #else
306         
307                 //divide breakpoints between processors
308                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
309                         if(processors == 1){ 
310                                 lines.push_back(linePair(0, iters));    
311                                 
312                                 //fill pref with scores
313                                 driverChimeras(midpoints, lines[0]);
314         
315                         }else{
316                         
317                                 int numSeqsPerProcessor = iters / processors;
318                                 
319                                 for (int i = 0; i < processors; i++) {
320                                         int startPos = i * numSeqsPerProcessor;
321                                         if(i == processors - 1){
322                                                 numSeqsPerProcessor = iters - i * numSeqsPerProcessor;
323                                         }
324                                         lines.push_back(linePair(startPos, numSeqsPerProcessor));
325                                 }
326                                 
327                                 createProcesses(midpoints);
328                         }
329                 #else
330                         lines.push_back(linePair(0, iters));    
331                         
332                         ///fill pref with scores
333                         driverChimeras(midpoints, lines[0]);
334                 #endif
335         
336         #endif
337         
338                 return 0;
339                 
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "Bellerophon", "getChimeras");
343                 exit(1);
344         }
345 }
346 /**************************************************************************************************/
347
348 int Bellerophon::createProcesses(vector<int> mid) {
349         try {
350 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
351                 int process = 0;
352                 int exitCommand = 1;
353                 vector<int> processIDS;
354                                 
355                 //loop through and create all the processes you want
356                 while (process != processors) {
357                         int pid = fork();
358                         
359                         if (pid > 0) {
360                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
361                                 process++;
362                         }else if (pid == 0){
363                                 exitCommand = driverChimeras(mid, lines[process]);
364                                 string tempOut = outputDir + toString(getpid()) + ".temp";
365                                 writePrefs(tempOut, lines[process]);
366                                 exit(0);
367                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
368                 }
369                 
370                 //force parent to wait until all the processes are done
371                 for (int i=0;i<processors;i++) { 
372                         int temp = processIDS[i];
373                         wait(&temp);
374                 }
375                 
376                 //get info that other processes created
377                 for (int i = 0; i < processIDS.size(); i++) {
378                         string tempIn = outputDir + toString(processIDS[i]) + ".temp";
379                         readPrefs(tempIn);
380                 }
381                 
382                 return exitCommand;
383 #endif          
384         }
385         catch(exception& e) {
386                 m->errorOut(e, "AlignCommand", "createProcesses");
387                 exit(1);
388         }
389 }
390 //***************************************************************************************************************
391 int Bellerophon::driverChimeras(vector<int> midpoints, linePair line) {
392         try {
393                 
394                 for (int h = line.start; h < (line.start + line.num); h++) {
395                         count = h;
396                         int midpoint = midpoints[h];
397                 
398                         //initialize pref[count]                
399                         for (int i = 0; i < numSeqs; i++ ) { 
400                                 pref[count][i].name = seqs[i]->getName();
401                                 pref[count][i].midpoint = midpoint;  
402                         }
403                         
404                         if (m->control_pressed) { return 0; }
405                         
406                         //create 2 vectors of sequences, 1 for left side and one for right side
407                         vector<Sequence> left;  vector<Sequence> right;
408                         
409                         for (int i = 0; i < seqs.size(); i++) {
410                                 
411                                 if (m->control_pressed) { return 0; }
412                                 
413                                 //cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
414                                 //cout << "whole = " << seqs[i]->getAligned().length() << endl;
415                                 //save left side
416                                 string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
417                                 Sequence tempLeft;
418                                 tempLeft.setName(seqs[i]->getName());
419                                 tempLeft.setAligned(seqLeft);
420                                 left.push_back(tempLeft);
421                                 //cout << "left = " << tempLeft.getAligned().length() << endl;                  
422                                 //save right side
423                                 string seqRight = seqs[i]->getAligned().substr(midpoint, window);
424                                 Sequence tempRight;
425                                 tempRight.setName(seqs[i]->getName());
426                                 tempRight.setAligned(seqRight);
427                                 right.push_back(tempRight);
428                                 //cout << "right = " << seqRight.length() << endl;      
429                         }
430                         
431                         //this should be parallelized
432                         //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
433                         //create a matrix containing the distance from left to left and right to right
434                         //calculate distances
435                         SparseMatrix* SparseLeft = new SparseMatrix();
436                         SparseMatrix* SparseRight = new SparseMatrix();
437                         
438                         createSparseMatrix(0, left.size(), SparseLeft, left);
439                         
440                         if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
441                         
442                         createSparseMatrix(0, right.size(), SparseRight, right);
443                         
444                         if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
445                         
446                         left.clear(); right.clear();
447                         vector<SeqMap> distMapRight;
448                         vector<SeqMap> distMapLeft;
449                         
450                         // Create a data structure to quickly access the distance information.
451                         //this is from thallingers reimplementation on get.oturep
452                         // It consists of a vector of distance maps, where each map contains
453                         // all distances of a certain sequence. Vector and maps are accessed
454                         // via the index of a sequence in the distance matrix
455                         distMapRight = vector<SeqMap>(numSeqs); 
456                         distMapLeft = vector<SeqMap>(numSeqs); 
457                         //cout << "left" << endl << endl;
458                         for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
459                                 distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
460                                 if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
461                                 //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
462                         }
463                         //cout << "right" << endl << endl;
464                         for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
465                                 distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
466                                 if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
467                                 //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
468                         }
469                         
470                         delete SparseLeft;
471                         delete SparseRight;
472                         
473                         //fill preference structure
474                         generatePreferences(distMapLeft, distMapRight, midpoint);
475                         
476                         if (m->control_pressed) { return 0; }
477                         
478                         //report progress
479                         if((h+1) % 10 == 0){    cout << "Processing sliding window: " << toString(h+1) <<  "\n";  m->mothurOutJustToLog("Processing sliding window: " + toString(h+1) + "\n") ;         }
480                         
481                 }
482                 
483                 //report progress
484                 if((line.start + line.num) % 10 != 0){  cout << "Processing sliding window: " << toString(line.start + line.num) <<  "\n";  m->mothurOutJustToLog("Processing sliding window: " + toString(line.start + line.num) + "\n") ;             }
485
486                 return 0;
487                 
488         }
489         catch(exception& e) {
490                 m->errorOut(e, "Bellerophon", "driverChimeras");
491                 exit(1);
492         }
493 }
494
495 /***************************************************************************************************************/
496 int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
497         try {
498
499                 for(int i=startSeq; i<endSeq; i++){
500                         
501                         for(int j=0;j<i;j++){
502                                 
503                                 if (m->control_pressed) { return 0; }
504                         
505                                 distCalculator->calcDist(s[i], s[j]);
506                                 float dist = distCalculator->getDist();
507                         
508                                 PCell temp(i, j, dist);
509                                 sparse->addCell(temp);
510                                 
511                         }
512                 }
513                 
514                 return 1;
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "Bellerophon", "createSparseMatrix");
518                 exit(1);
519         }
520 }
521 /***************************************************************************************************************/
522 int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
523         try {
524                 
525                 float dme = 0.0;
526                 SeqMap::iterator itR;
527                 SeqMap::iterator itL;
528                 
529                 for (int i = 0; i < left.size(); i++) {
530                         
531                         SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
532                                                                                                 //                                                                              1 to the distance of sequence 3 to sequence 1,
533                                                                                                 //                                                                              2 to the distance of sequence 3 to sequence 2.
534                         SeqMap currentRight = right[i];         // same as left but with distances on the right side.
535                         
536                         for (int j = 0; j < i; j++) {
537                         
538                                 if (m->control_pressed) {  return 0; }
539                                 
540                                 itL = currentLeft.find(j);
541                                 itR = currentRight.find(j);
542 //cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
543 //cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
544                                 
545                                 //if you can find this entry update the preferences
546                                 if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
547                                 
548                                         if (!correction) {
549                                                 pref[count][i].score += abs((itL->second - itR->second));
550                                                 pref[count][j].score += abs((itL->second - itR->second));
551 //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
552 //cout << "abs = " << abs((itL->second - itR->second)) << endl;
553 //cout << i << " score = " << pref[i].score[1] << endl;
554 //cout << j << " score = " << pref[j].score[1] << endl;
555                                         }else {
556                                                 pref[count][i].score += abs((sqrt(itL->second) - sqrt(itR->second)));
557                                                 pref[count][j].score += abs((sqrt(itL->second) - sqrt(itR->second)));
558 //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
559 //cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
560 //cout << i << " score = " << pref[i].score[1] << endl;
561 //cout << j << " score = " << pref[j].score[1] << endl;
562                                         }
563 //cout << "pref[" << i << "].closestLeft[1] = " <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
564                                         //are you the closest left sequence
565                                         if (itL->second < pref[count][i].closestLeft) {  
566
567                                                 pref[count][i].closestLeft = itL->second;
568                                                 pref[count][i].leftParent = seqs[j]->getName();
569 //cout << "updating closest left to " << pref[i].leftParent[1] << endl;
570                                         }
571 //cout << "pref[" << j << "].closestLeft[1] = " <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
572                                         if (itL->second < pref[count][j].closestLeft) { 
573                                                 pref[count][j].closestLeft = itL->second;
574                                                 pref[count][j].leftParent = seqs[i]->getName();
575 //cout << "updating closest left to " << pref[j].leftParent[1] << endl;
576                                         }
577                                         
578                                         //are you the closest right sequence
579                                         if (itR->second < pref[count][i].closestRight) {   
580                                                 pref[count][i].closestRight = itR->second;
581                                                 pref[count][i].rightParent = seqs[j]->getName();
582                                         }
583                                         if (itR->second < pref[count][j].closestRight) {   
584                                                 pref[count][j].closestRight = itR->second;
585                                                 pref[count][j].rightParent = seqs[i]->getName();
586                                         }
587                                         
588                                 }
589                         }
590                 
591                 }
592                 
593                                 
594                 return 1;
595
596         }
597         catch(exception& e) {
598                 m->errorOut(e, "Bellerophon", "generatePreferences");
599                 exit(1);
600         }
601 }
602 /**************************************************************************************************/
603 vector<Preference> Bellerophon::getBestPref() {
604         try {
605                 
606                 vector<Preference> best;
607                 
608                 //for each sequence
609                 for (int i = 0; i < numSeqs; i++) {
610                         
611                         //set best pref score to first one
612                         Preference temp = pref[0][i];
613                         
614                         if (m->control_pressed) { return best;  }
615                         
616                         //for each window
617                         for (int j = 1; j < pref.size(); j++) {
618                                 
619                                 //is this a better score
620                                 if (pref[j][i].score > temp.score) {    temp = pref[j][i];              }
621                         }
622                         
623                         best.push_back(temp);
624                 }
625                 
626                 //rank preference score to eachother
627                 float dme = 0.0;
628                 float expectedPercent = 1 / (float) (best.size());
629                 
630                 for (int i = 0; i < best.size(); i++) {  dme += best[i].score;  }
631         
632                 for (int i = 0; i < best.size(); i++) {
633
634                         if (m->control_pressed) { return best; }
635                         
636                         //gives the actual percentage of the dme this seq adds
637                         best[i].score = best[i].score / dme;
638                         
639                         //how much higher or lower is this than expected
640                         best[i].score = best[i].score / expectedPercent;
641                 
642                 }
643                 
644                 //sort Preferences highest to lowest
645                 sort(best.begin(), best.end(), comparePref);
646
647                 return best;
648         }
649         catch(exception& e) {
650                 m->errorOut(e, "Bellerophon", "getBestPref");
651                 exit(1);
652         }
653 }
654 /**************************************************************************************************/
655 int Bellerophon::writePrefs(string file, linePair tempLine) {
656         try {
657         
658                 ofstream outTemp;
659                 openOutputFile(file, outTemp);
660                 
661                 //lets you know what part of the pref matrix you are writing
662                 outTemp << tempLine.start << '\t' << tempLine.num << endl;
663                 
664                 for (int i = tempLine.start; i < (tempLine.start + tempLine.num); i++) {
665                         
666                         for (int j = 0; j < numSeqs; j++) {
667                                 
668                                 if (m->control_pressed) { outTemp.close(); remove(file.c_str()); return 0; }
669                                 
670                                 outTemp << pref[i][j].name << '\t' << pref[i][j].leftParent << '\t' << pref[i][j].rightParent << '\t';
671                                 outTemp << pref[i][j].score << '\t' << pref[i][j].closestLeft << '\t' << pref[i][j].closestRight << '\t' << pref[i][j].midpoint <<  endl;
672                         }
673                 }
674                 
675                 outTemp.close();
676                 
677                 return 0;
678         }
679         catch(exception& e) {
680                 m->errorOut(e, "Bellerophon", "writePrefs");
681                 exit(1);
682         }
683 }
684 /**************************************************************************************************/
685 int Bellerophon::readPrefs(string file) {
686         try {
687         
688                 ifstream inTemp;
689                 openInputFile(file, inTemp);
690                 
691                 int start, num;
692                 
693                 //lets you know what part of the pref matrix you are writing
694                 inTemp >> start >> num;  gobble(inTemp);
695                 
696                 for (int i = start; i < num; i++) {
697                         
698                         for (int j = 0; j < numSeqs; j++) {
699                                 
700                                 if (m->control_pressed) { inTemp.close(); remove(file.c_str()); return 0; }
701                         
702                                 inTemp >> pref[i][j].name >> pref[i][j].leftParent >> pref[i][j].rightParent;
703                                 inTemp >> pref[i][j].score >> pref[i][j].closestLeft >> pref[i][j].closestRight >> pref[i][j].midpoint;
704                                 gobble(inTemp);
705                         }
706                 }
707                 
708                 inTemp.close();
709                 
710                 remove(file.c_str());
711                 
712                 return 0;
713         }
714         catch(exception& e) {
715                 m->errorOut(e, "Bellerophon", "writePrefs");
716                 exit(1);
717         }
718 }
719 /**************************************************************************************************/
720 vector<string> Bellerophon::getBestWindow(linePair line) {
721         try {
722         
723                 vector<string> best;
724                         
725                 //for each sequence
726                 for (int i = 0; i < numSeqs; i++) {
727                         
728                         //set best pref score to first one
729                         Preference temp = pref[line.start][i];
730                         
731                         if (m->control_pressed) { return best;  }
732                         
733                         //for each window
734                         for (int j = (line.start+1); j < (line.start+line.num); j++) {
735                                 
736                                 //is this a better score
737                                 if (pref[j][i].score > temp.score) {    temp = pref[j][i];              }
738                         }
739                         
740                         string tempString = temp.name + '\t' + temp.leftParent + '\t' + temp.rightParent + '\t' + toString(temp.score);
741                         best.push_back(tempString);
742                 }
743
744                 return best;
745         
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "Bellerophon", "getBestWindow");
749                 exit(1);
750         }
751 }
752 /**************************************************************************************************/
753 int Bellerophon::fillPref(int process, vector<string>& best) {
754         try {
755                 //figure out where you start so you can put the best scores there
756                 int numSeqsPerProcessor = iters / processors;
757                 int start = process * numSeqsPerProcessor;
758                 
759                 for (int i = 0; i < best.size(); i++) {
760                 
761                         if (m->control_pressed) { return 0;  }
762                         
763                         istringstream iss (best[i],istringstream::in);
764                         
765                         string tempScore;
766                         iss >> pref[start][i].name >> pref[start][i].leftParent >> pref[start][i].rightParent >> tempScore;
767                         convert(tempScore, pref[start][i].score); 
768                 }
769
770                 return 0;
771         }
772         catch(exception& e) {
773                 m->errorOut(e, "Bellerophon", "fillPref");
774                 exit(1);
775         }
776 }
777
778 /**************************************************************************************************/
779