]> git.donarmstrong.com Git - mothur.git/blob - bellerophon.cpp
changes while testing
[mothur.git] / bellerophon.cpp
1 /*
2  *  bellerophon.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "bellerophon.h"
11 #include "eachgapdist.h"
12 #include "ignoregaps.h"
13 #include "onegapdist.h"
14
15
16 /***************************************************************************************************************/
17
18 Bellerophon::Bellerophon(string name, bool filterSeqs,  bool c, int win, int inc, int p, string o) : Chimera() {
19         try {
20                 fastafile = name;
21                 correction = c;
22                 outputDir = o;
23                 window = win;
24                 increment = inc;
25                 processors = p;
26                 
27                 //read in sequences
28                 seqs = readSeqs(fastafile);
29                 numSeqs = seqs.size();
30                 if (numSeqs == 0) { m->mothurOut("Error in reading you sequences."); m->mothurOutEndLine(); exit(1); }
31         
32                 //do soft filter
33                 if (filterSeqs)  {
34                         createFilter(seqs, 0.5);
35                         for (int i = 0; i < seqs.size(); i++) {  runFilter(seqs[i]);  }
36                 }
37                 
38                 distCalculator = new eachGapDist();
39                 
40                 //set default window to 25% of sequence length
41                 string seq0 = seqs[0]->getAligned();
42                 if (window == 0) { window = seq0.length() / 4;  }
43                 else if (window > (seq0.length() / 2)) {  
44                         m->mothurOut("Your sequence length is = " + toString(seq0.length()) + ". You have selected a window size greater than the length of half your aligned sequence. I will run it with a window size of " + toString((seq0.length() / 2))); m->mothurOutEndLine();
45                         window = (seq0.length() / 2);
46                 }
47                 
48                 if (increment > (seqs[0]->getAlignLength() - (2*window))) { 
49                         if (increment != 10) {
50                         
51                                 m->mothurOut("You have selected a increment that is too large. I will use the default."); m->mothurOutEndLine();
52                                 increment = 10;
53                                 if (increment > (seqs[0]->getAlignLength() - (2*window))) {  increment = 0;  }
54                                 
55                         }else{ increment = 0; }
56                 }
57                 
58                 if (increment == 0) { iters = 1; }
59                 else { iters = ((seqs[0]->getAlignLength() - (2*window)) / increment); }
60                 
61                 //initialize pref
62                 pref.resize(iters);
63                 for (int i = 0; i < iters; i++) { 
64                         Preference temp;
65                         for (int j = 0; j < numSeqs; j++) {  
66                                 pref[i].push_back(temp); 
67                         }
68                 } 
69
70         }
71         catch(exception& e) {
72                 m->errorOut(e, "Bellerophon", "Bellerophon");
73                 exit(1);
74         }
75 }
76
77 //***************************************************************************************************************
78 int Bellerophon::print(ostream& out, ostream& outAcc, string s) {
79         try {
80                 int above1 = 0;
81                 
82                 //sorted "best" preference scores for all seqs
83                 vector<Preference> best = getBestPref();
84                 
85                 if (m->control_pressed) { return numSeqs; }
86                 
87                 out << "Name\tScore\tLeft\tRight\t" << endl;
88                 //output prefenence structure to .chimeras file
89                 for (int i = 0; i < best.size(); i++) {
90                         
91                         if (m->control_pressed) {  return numSeqs; }
92                         
93                         out << best[i].name << '\t' << setprecision(3) << best[i].score << '\t' << best[i].leftParent << '\t' << best[i].rightParent << endl;
94                         
95                         //calc # of seqs with preference above 1.0
96                         if (best[i].score > 1.0) { 
97                                 above1++; 
98                                 outAcc << best[i].name << endl;
99                                 m->mothurOut(best[i].name + " is a suspected chimera at breakpoint " + toString(best[i].midpoint)); m->mothurOutEndLine();
100                                 m->mothurOut("It's score is " + toString(best[i].score) + " with suspected left parent " + best[i].leftParent + " and right parent " + best[i].rightParent); m->mothurOutEndLine();
101                         }
102                 }
103                 
104                 //output results to screen
105                 m->mothurOutEndLine();
106                 m->mothurOut("Sequence with preference score above 1.0: " + toString(above1)); m->mothurOutEndLine();
107                 int spot;
108                 spot = best.size()-1;
109                 m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
110                 spot = best.size() * 0.975;
111                 m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
112                 spot = best.size() * 0.75;
113                 m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
114                 spot = best.size() * 0.50;
115                 m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
116                 spot = best.size() * 0.25;
117                 m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
118                 spot = best.size() * 0.025;
119                 m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
120                 spot = 0;
121                 m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
122                 
123                 return numSeqs;
124
125         }
126         catch(exception& e) {
127                 m->errorOut(e, "Bellerophon", "print");
128                 exit(1);
129         }
130 }
131 #ifdef USE_MPI
132 //***************************************************************************************************************
133 int Bellerophon::print(MPI_File& out, MPI_File& outAcc, string s) {
134         try {
135         
136                 int pid;
137                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
138                 
139                 if (pid == 0) {
140                         string outString = "";
141                                                 
142                         //sorted "best" preference scores for all seqs
143                         vector<Preference> best = getBestPref();
144                         
145                         int above1 = 0;
146                         int ninetyfive = best.size() * 0.05;
147                         float cutoffScore = best[ninetyfive].score;
148
149                         if (m->control_pressed) { return numSeqs; }
150                         
151                         outString = "Name\tScore\tLeft\tRight\n";
152                         MPI_Status status;
153                         int olength = outString.length();
154                         char* buf5 = new char[olength];
155                         memcpy(buf5, outString.c_str(), olength);
156                                         
157                         MPI_File_write_shared(out, buf5, olength, MPI_CHAR, &status);
158                         
159                         delete buf5;
160
161                         //output prefenence structure to .chimeras file
162                         for (int i = 0; i < best.size(); i++) {
163                                 
164                                 if (m->control_pressed) {  return numSeqs; }
165                                 
166                                 outString = best[i].name + "\t" +  toString(best[i].score) + "\t" + best[i].leftParent + "\t" + best[i].rightParent + "\n";
167                         
168                                 MPI_Status status;
169                                 int length = outString.length();
170                                 char* buf2 = new char[length];
171                                 memcpy(buf2, outString.c_str(), length);
172                                         
173                                 MPI_File_write_shared(out, buf2, length, MPI_CHAR, &status);
174                                 
175                                 delete buf2;
176                                 
177                                 //calc # of seqs with preference above 95%tile
178                                 if (best[i].score >= cutoffScore) { 
179                                         above1++; 
180                                         string outAccString = "";
181                                          outAccString += best[i].name + "\n";
182                                         
183                                         MPI_Status statusAcc;
184                                         length = outAccString.length();
185                                         char* buf = new char[length];
186                                         memcpy(buf, outAccString.c_str(), length);
187                                         
188                                         MPI_File_write_shared(outAcc, buf, length, MPI_CHAR, &statusAcc);
189                                         
190                                         delete buf;
191
192                                         cout << best[i].name << " is a suspected chimera at breakpoint " << toString(best[i].midpoint) << endl;
193                                         cout << "It's score is " << toString(best[i].score) << " with suspected left parent " << best[i].leftParent << " and right parent " << best[i].rightParent << endl;
194                                 }
195                         }
196                         
197                         //output results to screen
198                         m->mothurOutEndLine();
199                         m->mothurOut("Sequence with preference score above " + toString(cutoffScore) +  ": " + toString(above1)); m->mothurOutEndLine();
200                         int spot;
201                         spot = best.size()-1;
202                         m->mothurOut("Minimum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
203                         spot = best.size() * 0.975;
204                         m->mothurOut("2.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
205                         spot = best.size() * 0.75;
206                         m->mothurOut("25%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
207                         spot = best.size() * 0.50;
208                         m->mothurOut("Median: \t" + toString(best[spot].score)); m->mothurOutEndLine();
209                         spot = best.size() * 0.25;
210                         m->mothurOut("75%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
211                         spot = best.size() * 0.025;
212                         m->mothurOut("97.5%-tile:\t" + toString(best[spot].score)); m->mothurOutEndLine();
213                         spot = 0;
214                         m->mothurOut("Maximum:\t" + toString(best[spot].score)); m->mothurOutEndLine();
215                         
216                 }
217                 
218                 return numSeqs;
219                 
220         }
221         catch(exception& e) {
222                 m->errorOut(e, "Bellerophon", "print");
223                 exit(1);
224         }
225 }
226 #endif
227 //********************************************************************************************************************
228 //sorts highest score to lowest
229 inline bool comparePref(Preference left, Preference right){
230         return (left.score > right.score);      
231 }
232 //***************************************************************************************************************
233 int Bellerophon::getChimeras() {
234         try {
235                 
236                 //create breaking points
237                 vector<int> midpoints;   midpoints.resize(iters, window);
238                 for (int i = 1; i < iters; i++) {  midpoints[i] = midpoints[i-1] + increment;  }
239         
240         #ifdef USE_MPI
241                 int pid, numSeqsPerProcessor; 
242         
243                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
244                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
245                 
246                 numSeqsPerProcessor = iters / processors;
247                 
248                 //each process hits this only once
249                 unsigned long long startPos = pid * numSeqsPerProcessor;
250                 if(pid == processors - 1){
251                                 numSeqsPerProcessor = iters - pid * numSeqsPerProcessor;
252                 }
253                 lines.push_back(linePair(startPos, numSeqsPerProcessor));
254                 
255                 //fill pref with scores
256                 driverChimeras(midpoints, lines[0]);
257                 
258                 if (m->control_pressed) { return 0; }
259                                 
260                 //each process must send its parts back to pid 0
261                 if (pid == 0) {
262                         
263                         //receive results 
264                         for (int j = 1; j < processors; j++) {
265                                 
266                                 vector<string>  MPIBestSend; 
267                                 for (int i = 0; i < numSeqs; i++) {
268                                 
269                                         if (m->control_pressed) { return 0; }
270
271                                         MPI_Status status;
272                                         //receive string
273                                         int length;
274                                         MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status);
275                                         
276                                         char* buf = new char[length];
277                                         MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
278                                         
279                                         string temp = buf;
280                                         if (temp.length() > length) { temp = temp.substr(0, length); }
281                                         delete buf;
282
283                                         MPIBestSend.push_back(temp);
284                                 }
285                                 
286                                 fillPref(j, MPIBestSend);
287                                 
288                                 if (m->control_pressed) { return 0; }
289                         }
290
291                 }else {
292                         //takes best window for each sequence and turns Preference to string that can be parsed by pid 0.
293                         //played with this a bit, but it may be better to try user-defined datatypes with set string lengths??
294                         vector<string> MPIBestSend = getBestWindow(lines[0]);
295                         pref.clear();
296                                 
297                         //send your result to parent
298                         for (int i = 0; i < numSeqs; i++) {
299                                 
300                                 if (m->control_pressed) { return 0; }
301                                 
302                                 int bestLength = MPIBestSend[i].length();
303                                 char* buf = new char[bestLength];
304                                 memcpy(buf, MPIBestSend[i].c_str(), bestLength);
305                                 
306                                 MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD);
307                                 MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD);
308                                 delete buf;
309                         }
310                         
311                         MPIBestSend.clear();
312                 }
313                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
314         #else
315         
316                 //divide breakpoints between processors
317                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
318                         if(processors == 1){ 
319                                 lines.push_back(linePair(0, iters));    
320                                 
321                                 //fill pref with scores
322                                 driverChimeras(midpoints, lines[0]);
323         
324                         }else{
325                         
326                                 int numSeqsPerProcessor = iters / processors;
327                                 
328                                 for (int i = 0; i < processors; i++) {
329                                         unsigned long long startPos = i * numSeqsPerProcessor;
330                                         if(i == processors - 1){
331                                                 numSeqsPerProcessor = iters - i * numSeqsPerProcessor;
332                                         }
333                                         lines.push_back(linePair(startPos, numSeqsPerProcessor));
334                                 }
335                                 
336                                 createProcesses(midpoints);
337                         }
338                 #else
339                         lines.push_back(linePair(0, iters));    
340                         
341                         ///fill pref with scores
342                         driverChimeras(midpoints, lines[0]);
343                 #endif
344         
345         #endif
346         
347                 return 0;
348                 
349         }
350         catch(exception& e) {
351                 m->errorOut(e, "Bellerophon", "getChimeras");
352                 exit(1);
353         }
354 }
355 /**************************************************************************************************/
356
357 int Bellerophon::createProcesses(vector<int> mid) {
358         try {
359 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
360                 int process = 0;
361                 int exitCommand = 1;
362                 vector<int> processIDS;
363                                 
364                 //loop through and create all the processes you want
365                 while (process != processors) {
366                         int pid = fork();
367                         
368                         if (pid > 0) {
369                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
370                                 process++;
371                         }else if (pid == 0){
372                                 exitCommand = driverChimeras(mid, lines[process]);
373                                 string tempOut = outputDir + toString(getpid()) + ".temp";
374                                 writePrefs(tempOut, lines[process]);
375                                 exit(0);
376                         }else { 
377                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
378                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
379                                 exit(0);
380                         }
381                 }
382                 
383                 //force parent to wait until all the processes are done
384                 for (int i=0;i<processors;i++) { 
385                         int temp = processIDS[i];
386                         wait(&temp);
387                 }
388                 
389                 //get info that other processes created
390                 for (int i = 0; i < processIDS.size(); i++) {
391                         string tempIn = outputDir + toString(processIDS[i]) + ".temp";
392                         readPrefs(tempIn);
393                 }
394                 
395                 return exitCommand;
396 #endif          
397         }
398         catch(exception& e) {
399                 m->errorOut(e, "AlignCommand", "createProcesses");
400                 exit(1);
401         }
402 }
403 //***************************************************************************************************************
404 int Bellerophon::driverChimeras(vector<int> midpoints, linePair line) {
405         try {
406                 
407                 for (int h = line.start; h < (line.start + line.num); h++) {
408                         count = h;
409                         int midpoint = midpoints[h];
410                 
411                         //initialize pref[count]                
412                         for (int i = 0; i < numSeqs; i++ ) { 
413                                 pref[count][i].name = seqs[i]->getName();
414                                 pref[count][i].midpoint = midpoint;  
415                         }
416                         
417                         if (m->control_pressed) { return 0; }
418                         
419                         //create 2 vectors of sequences, 1 for left side and one for right side
420                         vector<Sequence> left;  vector<Sequence> right;
421                         
422                         for (int i = 0; i < seqs.size(); i++) {
423                                 
424                                 if (m->control_pressed) { return 0; }
425                                 
426                                 //cout << "midpoint = " << midpoint << "\twindow = " << window << endl;
427                                 //cout << "whole = " << seqs[i]->getAligned().length() << endl;
428                                 //save left side
429                                 string seqLeft = seqs[i]->getAligned().substr(midpoint-window, window);
430                                 Sequence tempLeft;
431                                 tempLeft.setName(seqs[i]->getName());
432                                 tempLeft.setAligned(seqLeft);
433                                 left.push_back(tempLeft);
434                                 //cout << "left = " << tempLeft.getAligned().length() << endl;                  
435                                 //save right side
436                                 string seqRight = seqs[i]->getAligned().substr(midpoint, window);
437                                 Sequence tempRight;
438                                 tempRight.setName(seqs[i]->getName());
439                                 tempRight.setAligned(seqRight);
440                                 right.push_back(tempRight);
441                                 //cout << "right = " << seqRight.length() << endl;      
442                         }
443                         
444                         //this should be parallelized
445                         //perference = sum of (| distance of my left to sequence j's left - distance of my right to sequence j's right | )
446                         //create a matrix containing the distance from left to left and right to right
447                         //calculate distances
448                         SparseMatrix* SparseLeft = new SparseMatrix();
449                         SparseMatrix* SparseRight = new SparseMatrix();
450                         
451                         createSparseMatrix(0, left.size(), SparseLeft, left);
452                         
453                         if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
454                         
455                         createSparseMatrix(0, right.size(), SparseRight, right);
456                         
457                         if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
458                         
459                         left.clear(); right.clear();
460                         vector<SeqMap> distMapRight;
461                         vector<SeqMap> distMapLeft;
462                         
463                         // Create a data structure to quickly access the distance information.
464                         //this is from thallingers reimplementation on get.oturep
465                         // It consists of a vector of distance maps, where each map contains
466                         // all distances of a certain sequence. Vector and maps are accessed
467                         // via the index of a sequence in the distance matrix
468                         distMapRight = vector<SeqMap>(numSeqs); 
469                         distMapLeft = vector<SeqMap>(numSeqs); 
470                         //cout << "left" << endl << endl;
471                         for (MatData currentCell = SparseLeft->begin(); currentCell != SparseLeft->end(); currentCell++) {
472                                 distMapLeft[currentCell->row][currentCell->column] = currentCell->dist;
473                                 if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
474                                 //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
475                         }
476                         //cout << "right" << endl << endl;
477                         for (MatData currentCell = SparseRight->begin(); currentCell != SparseRight->end(); currentCell++) {
478                                 distMapRight[currentCell->row][currentCell->column] = currentCell->dist;
479                                 if (m->control_pressed) { delete SparseLeft; delete SparseRight; return 0; }
480                                 //cout << " i = " << currentCell->row << " j = " << currentCell->column << " dist = " << currentCell->dist << endl;
481                         }
482                         
483                         delete SparseLeft;
484                         delete SparseRight;
485                         
486                         //fill preference structure
487                         generatePreferences(distMapLeft, distMapRight, midpoint);
488                         
489                         if (m->control_pressed) { return 0; }
490                         
491                         //report progress
492                         if((h+1) % 10 == 0){    cout << "Processing sliding window: " << toString(h+1) <<  "\n";  m->mothurOutJustToLog("Processing sliding window: " + toString(h+1) + "\n") ;         }
493                         
494                 }
495                 
496                 //report progress
497                 if((line.start + line.num) % 10 != 0){  cout << "Processing sliding window: " << toString(line.start + line.num) <<  "\n";  m->mothurOutJustToLog("Processing sliding window: " + toString(line.start + line.num) + "\n") ;             }
498
499                 return 0;
500                 
501         }
502         catch(exception& e) {
503                 m->errorOut(e, "Bellerophon", "driverChimeras");
504                 exit(1);
505         }
506 }
507
508 /***************************************************************************************************************/
509 int Bellerophon::createSparseMatrix(int startSeq, int endSeq, SparseMatrix* sparse, vector<Sequence> s){
510         try {
511
512                 for(int i=startSeq; i<endSeq; i++){
513                         
514                         for(int j=0;j<i;j++){
515                                 
516                                 if (m->control_pressed) { return 0; }
517                         
518                                 distCalculator->calcDist(s[i], s[j]);
519                                 float dist = distCalculator->getDist();
520                         
521                                 PCell temp(i, j, dist);
522                                 sparse->addCell(temp);
523                                 
524                         }
525                 }
526                 
527                 return 1;
528         }
529         catch(exception& e) {
530                 m->errorOut(e, "Bellerophon", "createSparseMatrix");
531                 exit(1);
532         }
533 }
534 /***************************************************************************************************************/
535 int Bellerophon::generatePreferences(vector<SeqMap> left, vector<SeqMap> right, int mid){
536         try {
537                 
538                 SeqMap::iterator itR;
539                 SeqMap::iterator itL;
540                 
541                 for (int i = 0; i < left.size(); i++) {
542                         
543                         SeqMap currentLeft = left[i];    //example i = 3;   currentLeft is a map of 0 to the distance of sequence 3 to sequence 0,
544                                                                                                 //                                                                              1 to the distance of sequence 3 to sequence 1,
545                                                                                                 //                                                                              2 to the distance of sequence 3 to sequence 2.
546                         SeqMap currentRight = right[i];         // same as left but with distances on the right side.
547                         
548                         for (int j = 0; j < i; j++) {
549                         
550                                 if (m->control_pressed) {  return 0; }
551                                 
552                                 itL = currentLeft.find(j);
553                                 itR = currentRight.find(j);
554 //cout << " i = " << i << " j = " << j << " distLeft = " << itL->second << endl;
555 //cout << " i = " << i << " j = " << j << " distright = " << itR->second << endl;
556                                 
557                                 //if you can find this entry update the preferences
558                                 if ((itL != currentLeft.end()) && (itR != currentRight.end())) {
559                                 
560                                         if (!correction) {
561                                                 pref[count][i].score += abs((itL->second - itR->second));
562                                                 pref[count][j].score += abs((itL->second - itR->second));
563 //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
564 //cout << "abs = " << abs((itL->second - itR->second)) << endl;
565 //cout << i << " score = " << pref[i].score[1] << endl;
566 //cout << j << " score = " << pref[j].score[1] << endl;
567                                         }else {
568                                                 pref[count][i].score += abs((sqrt(itL->second) - sqrt(itR->second)));
569                                                 pref[count][j].score += abs((sqrt(itL->second) - sqrt(itR->second)));
570 //cout << "left " << i << " " << j << " = " << itL->second << " right " << i << " " << j << " = " << itR->second << endl;
571 //cout << "abs = " << abs((sqrt(itL->second) - sqrt(itR->second))) << endl;
572 //cout << i << " score = " << pref[i].score[1] << endl;
573 //cout << j << " score = " << pref[j].score[1] << endl;
574                                         }
575 //cout << "pref[" << i << "].closestLeft[1] = " <<      pref[i].closestLeft[1] << " parent = " << pref[i].leftParent[1] << endl;                        
576                                         //are you the closest left sequence
577                                         if (itL->second < pref[count][i].closestLeft) {  
578
579                                                 pref[count][i].closestLeft = itL->second;
580                                                 pref[count][i].leftParent = seqs[j]->getName();
581 //cout << "updating closest left to " << pref[i].leftParent[1] << endl;
582                                         }
583 //cout << "pref[" << j << "].closestLeft[1] = " <<      pref[j].closestLeft[1] << " parent = " << pref[j].leftParent[1] << endl;        
584                                         if (itL->second < pref[count][j].closestLeft) { 
585                                                 pref[count][j].closestLeft = itL->second;
586                                                 pref[count][j].leftParent = seqs[i]->getName();
587 //cout << "updating closest left to " << pref[j].leftParent[1] << endl;
588                                         }
589                                         
590                                         //are you the closest right sequence
591                                         if (itR->second < pref[count][i].closestRight) {   
592                                                 pref[count][i].closestRight = itR->second;
593                                                 pref[count][i].rightParent = seqs[j]->getName();
594                                         }
595                                         if (itR->second < pref[count][j].closestRight) {   
596                                                 pref[count][j].closestRight = itR->second;
597                                                 pref[count][j].rightParent = seqs[i]->getName();
598                                         }
599                                         
600                                 }
601                         }
602                 
603                 }
604                 
605                                 
606                 return 1;
607
608         }
609         catch(exception& e) {
610                 m->errorOut(e, "Bellerophon", "generatePreferences");
611                 exit(1);
612         }
613 }
614 /**************************************************************************************************/
615 vector<Preference> Bellerophon::getBestPref() {
616         try {
617                 
618                 vector<Preference> best;
619                 
620                 //for each sequence
621                 for (int i = 0; i < numSeqs; i++) {
622                         
623                         //set best pref score to first one
624                         Preference temp = pref[0][i];
625                         
626                         if (m->control_pressed) { return best;  }
627                         
628                         //for each window
629                         for (int j = 1; j < pref.size(); j++) {
630                                 
631                                 //is this a better score
632                                 if (pref[j][i].score > temp.score) {    temp = pref[j][i];              }
633                         }
634                         
635                         best.push_back(temp);
636                 }
637                 
638                 //rank preference score to eachother
639                 float dme = 0.0;
640                 float expectedPercent = 1 / (float) (best.size());
641                 
642                 for (int i = 0; i < best.size(); i++) {  dme += best[i].score;  }
643         
644                 for (int i = 0; i < best.size(); i++) {
645
646                         if (m->control_pressed) { return best; }
647                         
648                         //gives the actual percentage of the dme this seq adds
649                         best[i].score = best[i].score / dme;
650                         
651                         //how much higher or lower is this than expected
652                         best[i].score = best[i].score / expectedPercent;
653                 
654                 }
655                 
656                 //sort Preferences highest to lowest
657                 sort(best.begin(), best.end(), comparePref);
658
659                 return best;
660         }
661         catch(exception& e) {
662                 m->errorOut(e, "Bellerophon", "getBestPref");
663                 exit(1);
664         }
665 }
666 /**************************************************************************************************/
667 int Bellerophon::writePrefs(string file, linePair tempLine) {
668         try {
669         
670                 ofstream outTemp;
671                 m->openOutputFile(file, outTemp);
672                 
673                 //lets you know what part of the pref matrix you are writing
674                 outTemp << tempLine.start << '\t' << tempLine.num << endl;
675                 
676                 for (int i = tempLine.start; i < (tempLine.start + tempLine.num); i++) {
677                         
678                         for (int j = 0; j < numSeqs; j++) {
679                                 
680                                 if (m->control_pressed) { outTemp.close(); m->mothurRemove(file); return 0; }
681                                 
682                                 outTemp << pref[i][j].name << '\t' << pref[i][j].leftParent << '\t' << pref[i][j].rightParent << '\t';
683                                 outTemp << pref[i][j].score << '\t' << pref[i][j].closestLeft << '\t' << pref[i][j].closestRight << '\t' << pref[i][j].midpoint <<  endl;
684                         }
685                 }
686                 
687                 outTemp.close();
688                 
689                 return 0;
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "Bellerophon", "writePrefs");
693                 exit(1);
694         }
695 }
696 /**************************************************************************************************/
697 int Bellerophon::readPrefs(string file) {
698         try {
699         
700                 ifstream inTemp;
701                 m->openInputFile(file, inTemp);
702                 
703                 int start, num;
704                 
705                 //lets you know what part of the pref matrix you are writing
706                 inTemp >> start >> num;  m->gobble(inTemp);
707                 
708                 for (int i = start; i < num; i++) {
709                         
710                         for (int j = 0; j < numSeqs; j++) {
711                                 
712                                 if (m->control_pressed) { inTemp.close(); m->mothurRemove(file); return 0; }
713                         
714                                 inTemp >> pref[i][j].name >> pref[i][j].leftParent >> pref[i][j].rightParent;
715                                 inTemp >> pref[i][j].score >> pref[i][j].closestLeft >> pref[i][j].closestRight >> pref[i][j].midpoint;
716                                 m->gobble(inTemp);
717                         }
718                 }
719                 
720                 inTemp.close();
721                 
722                 m->mothurRemove(file);
723                 
724                 return 0;
725         }
726         catch(exception& e) {
727                 m->errorOut(e, "Bellerophon", "writePrefs");
728                 exit(1);
729         }
730 }
731 /**************************************************************************************************/
732 vector<string> Bellerophon::getBestWindow(linePair line) {
733         try {
734         
735                 vector<string> best;
736                         
737                 //for each sequence
738                 for (int i = 0; i < numSeqs; i++) {
739                         
740                         //set best pref score to first one
741                         Preference temp = pref[line.start][i];
742                         
743                         if (m->control_pressed) { return best;  }
744                         
745                         //for each window
746                         for (int j = (line.start+1); j < (line.start+line.num); j++) {
747                                 
748                                 //is this a better score
749                                 if (pref[j][i].score > temp.score) {    temp = pref[j][i];              }
750                         }
751                         
752                         string tempString = temp.name + '\t' + temp.leftParent + '\t' + temp.rightParent + '\t' + toString(temp.score);
753                         best.push_back(tempString);
754                 }
755
756                 return best;
757         
758         }
759         catch(exception& e) {
760                 m->errorOut(e, "Bellerophon", "getBestWindow");
761                 exit(1);
762         }
763 }
764 /**************************************************************************************************/
765 int Bellerophon::fillPref(int process, vector<string>& best) {
766         try {
767                 //figure out where you start so you can put the best scores there
768                 int numSeqsPerProcessor = iters / processors;
769                 int start = process * numSeqsPerProcessor;
770                 
771                 for (int i = 0; i < best.size(); i++) {
772                 
773                         if (m->control_pressed) { return 0;  }
774                         
775                         istringstream iss (best[i],istringstream::in);
776                         
777                         string tempScore;
778                         iss >> pref[start][i].name >> pref[start][i].leftParent >> pref[start][i].rightParent >> tempScore;
779                         convert(tempScore, pref[start][i].score); 
780                 }
781
782                 return 0;
783         }
784         catch(exception& e) {
785                 m->errorOut(e, "Bellerophon", "fillPref");
786                 exit(1);
787         }
788 }
789
790 /**************************************************************************************************/
791