]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
fixed bugs in venn and aligner
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "eachgapdist.h"
12
13 //***************************************************************************************************************
14
15 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Pintail::~Pintail() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22         }
23         catch(exception& e) {
24                 errorOut(e, "Pintail", "~Pintail");
25                 exit(1);
26         }
27 }       
28 //***************************************************************************************************************
29 void Pintail::print(ostream& out) {
30         try {
31                 
32                 for (int i = 0; i < querySeqs.size(); i++) {
33                         
34                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << endl;
35                         out << "Observed\t";
36                         
37                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
38                         out << endl;
39                         
40                         out << "Expected\t";
41                         
42                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
43                         out << endl;
44                         
45                 }
46         }
47         catch(exception& e) {
48                 errorOut(e, "Pintail", "print");
49                 exit(1);
50         }
51 }
52
53 //***************************************************************************************************************
54 void Pintail::getChimeras() {
55         try {
56                 
57                 //read in query sequences and subject sequences
58                 mothurOut("Reading sequences and template file... "); cout.flush();
59                 querySeqs = readSeqs(fastafile);
60                 templateSeqs = readSeqs(templateFile);
61                 mothurOut("Done."); mothurOutEndLine();
62                 
63                 int numSeqs = querySeqs.size();
64                 
65                 obsDistance.resize(numSeqs);
66                 expectedDistance.resize(numSeqs);
67                 seqCoef.resize(numSeqs);
68                 DE.resize(numSeqs);
69                 Qav.resize(numSeqs);
70                 bestfit.resize(numSeqs);
71                 trim.resize(numSeqs);
72                 deviation.resize(numSeqs);
73                 windowSizes.resize(numSeqs, window);
74                 
75                 //break up file if needed
76                 int linesPerProcess = processors / numSeqs;
77                 
78                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
79                         //find breakup of sequences for all times we will Parallelize
80                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
81                         else {
82                                 //fill line pairs
83                                 for (int i = 0; i < (processors-1); i++) {                      
84                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
85                                 }
86                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
87                                 int i = processors - 1;
88                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
89                         }
90                 #else
91                         lines.push_back(new linePair(0, numSeqs));
92                 #endif
93                 
94                 distcalculator = new eachGapDist();
95                 
96                 if (processors == 1) { 
97                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
98                         bestfit = findPairs(lines[0]->start, lines[0]->end);
99 for (int m = 0; m < templateSeqs.size(); m++)  {
100         if (templateSeqs[m]->getName() == "198806") {  bestfit[0] = *(templateSeqs[m]); }
101         if (templateSeqs[m]->getName() == "198806") {  bestfit[1] = *(templateSeqs[m]); }
102         if (templateSeqs[m]->getName() == "108139") {  bestfit[2] = *(templateSeqs[m]); }
103 }
104                         
105 for (int j = 0; j < bestfit.size(); j++) {//cout << querySeqs[j]->getName() << '\t' << "length = " <<  querySeqs[j]->getAligned().length() << '\t' << bestfit[j].getName() << " length = " <<  bestfit[j].getAligned().length() <<  endl; 
106                                 //chops off beginning and end of sequences so they both start and end with a base
107                                 trimSeqs(querySeqs[j], bestfit[j], j);  
108 //cout << "NEW SEQ PAIR" << querySeqs[j]->getAligned() << endl << "IN THE MIDDLE" <<  endl << bestfit[j].getAligned() << endl; 
109
110 }
111
112                         mothurOut("Done."); mothurOutEndLine();
113
114                         windows = findWindows(lines[0]->start, lines[0]->end);
115                 } else {                createProcessesSpots();         }
116
117                 //find P
118                 if (consfile == "") {   probabilityProfile = calcFreq(templateSeqs);  }
119                 else                            {   probabilityProfile = readFreq();                      }
120                 
121                 //make P into Q
122                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];      }
123                 
124                 if (processors == 1) { 
125                                                 
126                         mothurOut("Calculating observed distance... "); cout.flush();
127                         obsDistance = calcObserved(lines[0]->start, lines[0]->end);
128                         mothurOut("Done."); mothurOutEndLine();
129                         
130                         mothurOut("Finding variability... "); cout.flush();
131                         Qav = findQav(lines[0]->start, lines[0]->end);
132 for (int i = 0; i < Qav.size(); i++) {
133 cout << querySeqs[i]->getName() << " = ";
134 for (int u = 0; u < Qav[i].size();u++) {   cout << Qav[i][u] << '\t';  }
135 cout << endl << endl;
136 }
137
138
139                         mothurOut("Done."); mothurOutEndLine();
140                         
141                         mothurOut("Calculating alpha... "); cout.flush();
142                         seqCoef = getCoef(lines[0]->start, lines[0]->end);
143 for (int i = 0; i < seqCoef.size(); i++) {
144 cout << querySeqs[i]->getName() << " coef = " << seqCoef[i] << endl;
145 }
146
147                         mothurOut("Done."); mothurOutEndLine();
148                         
149                         mothurOut("Calculating expected distance... "); cout.flush();
150                         expectedDistance = calcExpected(lines[0]->start, lines[0]->end);
151                         mothurOut("Done."); mothurOutEndLine();
152                         
153                         mothurOut("Finding deviation... "); cout.flush();
154                         DE = calcDE(lines[0]->start, lines[0]->end); 
155                         deviation = calcDist(lines[0]->start, lines[0]->end); 
156                         mothurOut("Done."); mothurOutEndLine();
157                         
158                         
159                         
160                 } 
161                 else {          createProcesses();              }
162                 
163                 delete distcalculator;
164         
165                 //free memory
166                 for (int i = 0; i < lines.size(); i++)                  {       delete lines[i];                }
167                         
168
169         }
170         catch(exception& e) {
171                 errorOut(e, "Pintail", "getChimeras");
172                 exit(1);
173         }
174 }
175
176 //***************************************************************************************************************
177
178 vector<Sequence*> Pintail::readSeqs(string file) {
179         try {
180         
181                 ifstream in;
182                 openInputFile(file, in);
183                 vector<Sequence*> container;
184                 
185                 //read in seqs and store in vector
186                 while(!in.eof()){
187                         Sequence* current = new Sequence(in);
188                         
189                         if (current->getAligned() == "") { current->setAligned(current->getUnaligned()); }
190                         //takes out stuff is needed
191                         current->setUnaligned(current->getUnaligned());
192                         
193                         container.push_back(current);
194                         
195                         gobble(in);
196                 }
197                 
198                 in.close();
199                 return container;
200                 
201         }
202         catch(exception& e) {
203                 errorOut(e, "Pintail", "readSeqs");
204                 exit(1);
205         }
206 }
207
208 //***************************************************************************************************************
209 //num is query's spot in querySeqs
210 void Pintail::trimSeqs(Sequence* query, Sequence& subject, int num) {
211         try {
212         
213                 string q = query->getAligned();
214                 string s = subject.getAligned();
215         
216                 int front = 0;
217                 for (int i = 0; i < q.length(); i++) {
218                         if (isalpha(q[i]) && isalpha(s[i])) { front = i; break;  }
219                 }
220                 
221                 q = q.substr(front, q.length());
222                 s = s.substr(front, s.length());
223                 
224                 int back = 0;           
225                 for (int i = q.length(); i >= 0; i--) {
226                         if (isalpha(q[i]) && isalpha(s[i])) { back = i; break;  }
227                 }
228         
229                 q = q.substr(0, back);
230                 s = s.substr(0, back);
231
232                 trim[num][front] = back;
233         
234                 //save 
235                 query->setAligned(q);
236                 query->setUnaligned(q);
237                 subject.setAligned(s);
238                 subject.setUnaligned(s);
239         }
240         catch(exception& e) {
241                 errorOut(e, "Pintail", "trimSeqs");
242                 exit(1);
243         }
244 }
245
246 //***************************************************************************************************************
247
248 vector<float> Pintail::readFreq() {
249         try {
250         
251                 ifstream in;
252                 openInputFile(consfile, in);
253                 
254                 vector<float> prob;
255                 
256                 //read in probabilities and store in vector
257                 int pos; float num;
258                 
259                 while(!in.eof()){
260                         
261                         in >> pos >> num;
262                         
263                         prob.push_back(num);
264                         
265                         gobble(in);
266                 }
267                 
268                 in.close();
269                 return prob;
270                 
271         }
272         catch(exception& e) {
273                 errorOut(e, "Pintail", "readFreq");
274                 exit(1);
275         }
276 }
277
278 //***************************************************************************************************************
279 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
280 vector<Sequence> Pintail::findPairs(int start, int end) {
281         try {
282                 
283                 vector<Sequence> seqsMatches;  seqsMatches.resize(end-start);
284                 
285                 for(int i = start; i < end; i++){
286                 
287                         float smallest = 10000.0;
288                         Sequence query = *(querySeqs[i]);
289                 
290                         for(int j = 0; j < templateSeqs.size(); j++){
291                                 
292                                 Sequence temp = *(templateSeqs[j]);
293                                 
294                                 distcalculator->calcDist(query, temp);
295                                 float dist = distcalculator->getDist();
296                                 
297                                 if (dist < smallest) { 
298                                         seqsMatches[i] = *(templateSeqs[j]);
299                                         smallest = dist;
300                                 }
301                         }
302                 }
303                 
304                 return seqsMatches;
305         
306         }
307         catch(exception& e) {
308                 errorOut(e, "Pintail", "findPairs");
309                 exit(1);
310         }
311 }
312
313 //***************************************************************************************************************
314 //find the window breaks for each sequence
315 vector< vector<int> > Pintail::findWindows(int start, int end) {
316         try {
317                 
318                 vector< vector<int> > win;  win.resize(end-start);
319                 
320                 //for each sequence
321                 int count = 0;
322                 for(int i = start; i < end; i++){
323                         
324                         //if window is set to default
325                         if (windowSizes[i] == 0) {  if (querySeqs[i]->getAligned().length() > 1200) {  windowSizes[i] = 300; }
326                                                         else{  windowSizes[i] = (querySeqs[i]->getAligned().length() / 4); }  } 
327                         else if (windowSizes[i] > (querySeqs[i]->getAligned().length() / 4)) { 
328                                         mothurOut("You have selected to large a window size for sequence " + querySeqs[i]->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
329                                         windowSizes[i] = (querySeqs[i]->getAligned().length() / 4); 
330                         }
331         
332         //cout << "length = " << querySeqs[i]->getAligned().length() << " window = " << windowSizes[i] << " increment = " << increment << endl;                 
333                                 
334
335                         string seq = querySeqs[i]->getAligned();
336                         int numBases = querySeqs[i]->getUnaligned().length();
337                         int spot = 0;
338                         
339                         //find location of first base
340                         for (int j = 0; j < seq.length(); j++) {
341                                 if (isalpha(seq[j])) { spot = j;  break;  }
342                         }
343                         
344                         //save start of seq
345                         win[count].push_back(spot);
346                         
347                         
348                         //move ahead increment bases at a time until all bases are in a window
349                         int countBases = 0;
350                         int totalBases = 0;  //used to eliminate window of blanks at end of sequence
351                         for (int m = spot; m < seq.length(); m++) {
352                                 
353                                 //count number of bases you see
354                                 if (isalpha(seq[m])) { countBases++; totalBases++;  }
355                                 
356                                 //if you have seen enough bases to make a new window
357                                 if (countBases >= increment) {
358                                         win[count].push_back(m);  //save spot in alignment
359                                         countBases = 0;                         //reset bases you've seen in this window
360                                 }
361                                 
362                                 //no need to continue if all your bases are in a window
363                                 if (totalBases == numBases) {   break;  }
364                         }
365                         
366                         count++;
367                 }
368                 
369                 
370                 
371                 return win;
372         
373         }
374         catch(exception& e) {
375                 errorOut(e, "Pintail", "findWindows");
376                 exit(1);
377         }
378 }
379
380 //***************************************************************************************************************
381 vector< vector<float> > Pintail::calcObserved(int start, int end) {
382         try {
383                 
384                 vector< vector<float> > temp;
385                 temp.resize(end-start);
386                 
387                 int count = 0;
388                 for(int i = start; i < end; i++){
389                 
390                         Sequence* query = querySeqs[i];
391                         Sequence subject = bestfit[i];
392                 
393                         int startpoint = 0; 
394                         for (int m = 0; m < windows[i].size(); m++) {
395
396                                 string seqFrag = query->getAligned().substr(windows[i][startpoint], windowSizes[i]);
397                                 string seqFragsub = subject.getAligned().substr(windows[i][startpoint], windowSizes[i]);
398                                                                 
399                                 int diff = 0;
400                 for (int b = 0; b < seqFrag.length(); b++) {
401                   
402                     //if either the query or subject is not a gap 
403                     if ((isalpha(seqFrag[b])) || (isalpha(seqFragsub[b]))) {
404                         //and they are different - penalize
405                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
406                     }
407                 }
408                
409                 //percentage of mismatched bases
410                                 float dist;
411                 dist = diff / (float) seqFrag.length() * 100;       
412                                 
413                                 temp[count].push_back(dist);
414                                 
415                                 startpoint++;
416                         }
417                         
418                         count++;
419                 }
420                 
421                 return temp;
422         }
423         catch(exception& e) {
424                 errorOut(e, "Pintail", "calcObserved");
425                 exit(1);
426         }
427 }
428 //***************************************************************************************************************
429 vector<float>  Pintail::calcDist(int start, int end) {
430         try {
431                 
432                 vector<float> temp;
433                                 
434                 for(int i = start; i < end; i++){
435                 
436                         Sequence* query = querySeqs[i];
437                         Sequence subject = bestfit[i];
438                         
439                         string seqFrag = query->getAligned();
440                         string seqFragsub = subject.getAligned();
441                                                                                                                 
442                         int diff = 0;
443                         for (int b = 0; b < seqFrag.length(); b++) {
444                   
445                                 //if either the query or subject is not a gap 
446                                 if ((isalpha(seqFrag[b])) || (isalpha(seqFragsub[b]))) {
447                                         //and they are different - penalize
448                                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
449                                 }
450                         }
451                
452                         //percentage of mismatched bases
453                         float dist;
454                         dist = diff / (float) seqFrag.length() * 100;       
455                                 
456                         temp.push_back(dist);
457                 }
458                 
459                 return temp;
460         }
461         catch(exception& e) {
462                 errorOut(e, "Pintail", "calcDist");
463                 exit(1);
464         }
465 }
466
467 //***************************************************************************************************************
468 vector< vector<float> > Pintail::calcExpected(int start, int end) {
469         try {
470                 
471                 vector< vector<float> > temp; temp.resize(end-start);
472                 
473                 //for each sequence
474                 int count = 0;
475                 for(int i = start; i < end; i++){
476                         
477                         float coef = seqCoef[i];
478                         
479                         //for each window
480                         vector<float> queryExpected;
481                         for (int m = 0; m < windows[i].size(); m++) {           
482                                 float expected = Qav[i][m] * coef;
483                                 queryExpected.push_back(expected);      
484 //cout << "average variabilty over window = " << averageProbability[m] << " coef = " << coef << " ei = "  << expected << '\t' <<  "window = " << m << endl;
485                         }
486                         
487                         temp[count] = queryExpected;
488                         
489                         count++;
490                         
491                 }
492                 
493                 return temp;
494                                 
495         }
496         catch(exception& e) {
497                 errorOut(e, "Pintail", "calcExpected");
498                 exit(1);
499         }
500 }
501 //***************************************************************************************************************
502 vector<float> Pintail::calcDE(int start, int end) {
503         try {
504                 
505                 vector<float> temp; temp.resize(end-start);
506         
507                 //for each sequence
508                 int count = 0;
509                 for(int i = start; i < end; i++){
510                         
511                         vector<float> obs = obsDistance[i];
512                         vector<float> exp = expectedDistance[i];
513                         
514 //      cout << "difference between obs and exp = " << abs(obs[m] - exp[m]) << endl;    
515                         //for each window
516                         float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
517                         for (int m = 0; m < windows[i].size(); m++) {           sum += ((obs[m] - exp[m]) * (obs[m] - exp[m]));         }
518                         
519                         float de = sqrt((sum / (windows[i].size() - 1)));
520                         
521                         temp[count] = de;
522                         count++;
523                 }
524                 
525                 return temp;
526         }
527         catch(exception& e) {
528                 errorOut(e, "Pintail", "calcDE");
529                 exit(1);
530         }
531 }
532
533 //***************************************************************************************************************
534
535 vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
536         try {
537
538                 vector<float> prob;
539                 string freqfile = getRootName(templateFile) + "probability";
540                 ofstream outFreq;
541                 
542                 openOutputFile(freqfile, outFreq);
543                 
544                 //at each position in the sequence
545                 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
546                 
547                         vector<int> freq;   freq.resize(4,0);
548                         int gaps = 0;
549                         
550                         //find the frequency of each nucleotide
551                         for (int j = 0; j < seqs.size(); j++) {
552                                 
553                                 char value = seqs[j]->getAligned()[i];
554                         
555                                 if(toupper(value) == 'A')                                                                       {       freq[0]++;      }
556                                 else if(toupper(value) == 'T' || toupper(value) == 'U')         {       freq[1]++;      }
557                                 else if(toupper(value) == 'G')                                                          {       freq[2]++;      }
558                                 else if(toupper(value) == 'C')                                                          {       freq[3]++;      }
559                                 else { gaps++; }
560                         }
561                         
562                         //find base with highest frequency
563                         int highest = 0;
564                         for (int m = 0; m < freq.size(); m++) {   if (freq[m] > highest) {  highest = freq[m];  }               }
565                         
566                         float highFreq;
567                         //if ( (seqs.size() - gaps) == 0 ) {  highFreq = 1.0;  }                        
568                         //else { highFreq = highest / (float) (seqs.size() - gaps);      }
569                         highFreq = highest / (float) seqs.size();
570 cout << i << '\t' << highFreq << endl;
571                         
572                         float Pi;
573                         Pi =  (highFreq - 0.25) / 0.75; 
574                         
575                         //saves this for later
576                         outFreq << i << '\t' << Pi << endl;
577                                 
578                         prob.push_back(Pi); 
579                 }
580                 
581                 outFreq.close();
582                 
583                 return prob;
584                                 
585         }
586         catch(exception& e) {
587                 errorOut(e, "Pintail", "calcFreq");
588                 exit(1);
589         }
590 }
591 //***************************************************************************************************************
592 vector< vector<float> > Pintail::findQav(int start, int end) {
593         try {
594                 vector< vector<float> > averages; 
595                 map<int, int>::iterator it;
596                 
597                 for(int i = start; i < end; i++){
598                 
599                         //for each window find average
600                         vector<float> temp;
601                         for (int m = 0; m < windows[i].size(); m++) {
602                                 
603                                 float average = 0.0;
604                                 
605                                 it = trim[i].begin();  //trim[i] is a map of where this sequence was trimmed
606                                 
607                                 //while you are in the window for this sequence
608                                 for (int j = windows[i][m]+it->first; j < (windows[i][m]+windowSizes[i]); j++) {   average += probabilityProfile[j];    }
609                                 
610                                 average = average / windowSizes[i];
611         //cout << average << endl;                      
612                                 //save this windows average
613                                 temp.push_back(average);
614                         }
615                         
616                         //save this qav
617                         averages.push_back(temp);
618                 }
619                 
620                 return averages;
621         }
622         catch(exception& e) {
623                 errorOut(e, "Pintail", "findQav");
624                 exit(1);
625         }
626 }
627 //***************************************************************************************************************
628 vector<float> Pintail::getCoef(int start, int end) {
629         try {
630                 vector<float> coefs;
631                 coefs.resize(end-start);
632                 
633                 //find a coef for each sequence
634                 int count = 0;
635                 for(int i = start; i < end; i++){
636                 
637                         //find average prob for this seqs windows
638                         float probAverage = 0.0;
639                         for (int j = 0; j < Qav[i].size(); j++) {   probAverage += Qav[i][j];   }
640                         probAverage = probAverage / (float) Qav[i].size();
641         cout << "(sum of ai) / m = " << probAverage << endl;            
642
643                         vector<float> temp = obsDistance[i];
644                         
645                         //find observed average 
646                         float obsAverage = 0.0;
647                         for (int j = 0; j < temp.size(); j++) {   obsAverage += temp[j];        }
648                         obsAverage = obsAverage / (float) temp.size();
649 cout << "(sum of oi) / m = " << obsAverage << endl;             
650                         float coef = obsAverage / probAverage;
651                 
652                         //save this sequences coefficient
653                         coefs[count] = coef;
654                         
655                         count++;
656                 }
657                 
658                                                 
659                 return coefs;
660         }
661         catch(exception& e) {
662                 errorOut(e, "Pintail", "getCoef");
663                 exit(1);
664         }
665 }
666
667
668 /**************************************************************************************************/
669
670 void Pintail::createProcessesSpots() {
671         try {
672 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
673                 int process = 0;
674                 vector<int> processIDS;
675                 vector< vector<int> > win; win.resize(querySeqs.size());
676                 vector< map <int, int> > t; t.resize(querySeqs.size());
677                 
678                 //loop through and create all the processes you want
679                 while (process != processors) {
680                         int pid = fork();
681                         
682                         if (pid > 0) {
683                                 processIDS.push_back(pid);  
684                                 process++;
685                         }else if (pid == 0){
686                                 
687                                 vector<Sequence> tempbest;
688                                 tempbest = findPairs(lines[process]->start, lines[process]->end);
689                                 int count = 0;
690                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
691                                         bestfit[i] = tempbest[count];
692                                         
693                                         //chops off beginning and end of sequences so they both start and end with a base
694                                         trimSeqs(querySeqs[i], bestfit[i], i);
695                                         t[i] = trim[i];
696                                         
697                                         count++;
698                                 }
699                                 
700                                 
701                                 
702                                 vector< vector<int> > temp = findWindows(lines[process]->start, lines[process]->end);
703                                 
704                                 //move into best
705                                 count = 0;
706                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
707                                         win[i] = temp[count];
708                                         count++;
709                                 }
710                                 
711                                 exit(0);
712                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
713                 }
714                 
715                 //force parent to wait until all the processes are done
716                 for (int i=0;i<processors;i++) { 
717                         int temp = processIDS[i];
718                         wait(&temp);
719                 }
720                 
721                 windows = win;
722                 trim = t;
723 #else
724                 windows = findWindows(lines[0]->start, lines[0]->end);
725
726 #endif          
727         }
728         catch(exception& e) {
729                 errorOut(e, "Pintail", "createProcessesSpots");
730                 exit(1);
731         }
732 }
733
734
735 /**************************************************************************************************/
736
737 void Pintail::createProcesses() {
738         try {
739 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
740                 int process = 0;
741                 vector<int> processIDS;
742                 
743                 vector< vector<float> > exp;  exp.resize(querySeqs.size());
744                 vector<float> de; de.resize(querySeqs.size());
745                 vector< vector<float> > obs; obs.resize(querySeqs.size());
746                 
747                 
748                 //loop through and create all the processes you want
749                 while (process != processors) {
750                         int pid = fork();
751                         
752                         if (pid > 0) {
753                                 processIDS.push_back(pid);  
754                                 process++;
755                         }else if (pid == 0){
756                                 
757                                 vector< vector<float> > temp;
758                                 vector<float> tempde;
759                                 int count = 0;
760                                 
761                                 
762                                 temp = calcObserved(lines[process]->start, lines[process]->end);
763                                 count = 0;
764                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
765                                         obs[i] = temp[count];
766                                         count++;
767                                 }
768
769                                 temp = findQav(lines[process]->start, lines[process]->end);
770                                 count = 0;
771                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
772                                         Qav[i] = temp[count];
773                                         count++;
774                                 }
775                                 
776                                 tempde = getCoef(lines[process]->start, lines[process]->end);
777                                 count = 0;
778                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
779                                         seqCoef[i] = tempde[count];
780                                         count++;
781                                 }
782                                 
783                                 temp = calcExpected(lines[process]->start, lines[process]->end);
784                                 count = 0;
785                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
786                                         exp[i] = temp[count];
787                                         count++;
788                                 }
789
790                                 
791                                 tempde = calcDE(lines[process]->start, lines[process]->end); 
792                                 count = 0;
793                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
794                                         de[i] = tempde[count];
795                                         count++;
796                                 }
797
798                                 exit(0);
799                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
800                 }
801                 
802                 //force parent to wait until all the processes are done
803                 for (int i=0;i<processors;i++) { 
804                         int temp = processIDS[i];
805                         wait(&temp);
806                 }
807                 
808                 obsDistance = obs;
809                 expectedDistance = exp;
810                 DE = de;
811                 
812 #else
813                 bestfit = findPairs(lines[0]->start, lines[0]->end);
814                 obsDistance = calcObserved(lines[0]->start, lines[0]->end);
815                 Qav = findQav(lines[0]->start, lines[0]->end);
816                 seqCoef = getCoef(lines[0]->start, lines[0]->end);
817                 expectedDistance = calcExpected(lines[0]->start, lines[0]->end);
818                 DE = calcDE(lines[0]->start, lines[0]->end); 
819
820 #endif          
821         }
822         catch(exception& e) {
823                 errorOut(e, "Pintail", "createProcesses");
824                 exit(1);
825         }
826 }
827
828 //***************************************************************************************************************
829
830