]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
changed sequence class so that when constructor is called aligned and unaligned value...
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12
13 //***************************************************************************************************************
14
15 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Pintail::~Pintail() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22         }
23         catch(exception& e) {
24                 errorOut(e, "Pintail", "~Pintail");
25                 exit(1);
26         }
27 }       
28 //***************************************************************************************************************
29 void Pintail::print(ostream& out) {
30         try {
31                 
32                 for (int i = 0; i < querySeqs.size(); i++) {
33                         
34                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << endl;
35                         out << "Observed\t";
36                         
37                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
38                         out << endl;
39                         
40                         out << "Expected\t";
41                         
42                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
43                         out << endl;
44                         
45                 }
46         }
47         catch(exception& e) {
48                 errorOut(e, "Pintail", "print");
49                 exit(1);
50         }
51 }
52
53 //***************************************************************************************************************
54 void Pintail::getChimeras() {
55         try {
56                 
57                 //read in query sequences and subject sequences
58                 mothurOut("Reading sequences and template file... "); cout.flush();
59                 querySeqs = readSeqs(fastafile);
60                 templateSeqs = readSeqs(templateFile);
61                 mothurOut("Done."); mothurOutEndLine();
62                 
63                 int numSeqs = querySeqs.size();
64                 
65                 obsDistance.resize(numSeqs);
66                 expectedDistance.resize(numSeqs);
67                 seqCoef.resize(numSeqs);
68                 DE.resize(numSeqs);
69                 Qav.resize(numSeqs);
70                 bestfit.resize(numSeqs);
71                 deviation.resize(numSeqs);
72                 trimmed.resize(numSeqs);
73                 windowSizes.resize(numSeqs, window);
74                 windows.resize(numSeqs);
75                 
76                 //break up file if needed
77                 int linesPerProcess = processors / numSeqs;
78                 
79                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
80                         //find breakup of sequences for all times we will Parallelize
81                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
82                         else {
83                                 //fill line pairs
84                                 for (int i = 0; i < (processors-1); i++) {                      
85                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
86                                 }
87                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
88                                 int i = processors - 1;
89                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
90                         }
91                 #else
92                         lines.push_back(new linePair(0, numSeqs));
93                 #endif
94                 
95                 distcalculator = new ignoreGaps();
96
97                                 
98                 if (processors == 1) { 
99                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
100                         bestfit = findPairs(lines[0]->start, lines[0]->end);
101 /*for (int m = 0; m < templateSeqs.size(); m++)  {
102         if (templateSeqs[m]->getName() == "159481") {  bestfit[17] = *(templateSeqs[m]); }
103         if (templateSeqs[m]->getName() == "100137") {  bestfit[16] = *(templateSeqs[m]); }
104         if (templateSeqs[m]->getName() == "112956") {  bestfit[15] = *(templateSeqs[m]); }
105         if (templateSeqs[m]->getName() == "102326") {  bestfit[14] = *(templateSeqs[m]); }
106         if (templateSeqs[m]->getName() == "66229") {  bestfit[13] = *(templateSeqs[m]); }
107         if (templateSeqs[m]->getName() == "206276") {  bestfit[12] = *(templateSeqs[m]); }
108     if (templateSeqs[m]->getName() == "63607") {  bestfit[11] = *(templateSeqs[m]); }
109         if (templateSeqs[m]->getName() == "7056") {  bestfit[10] = *(templateSeqs[m]); }
110         if (templateSeqs[m]->getName() == "7088") {  bestfit[9] = *(templateSeqs[m]); }
111         if (templateSeqs[m]->getName() == "17553") {  bestfit[8] = *(templateSeqs[m]); }
112         if (templateSeqs[m]->getName() == "131723") {  bestfit[7] = *(templateSeqs[m]); }
113         if (templateSeqs[m]->getName() == "69013") {  bestfit[6] = *(templateSeqs[m]); }
114         if (templateSeqs[m]->getName() == "24543") {  bestfit[5] = *(templateSeqs[m]); }
115         if (templateSeqs[m]->getName() == "27824") {  bestfit[4] = *(templateSeqs[m]); }
116         if (templateSeqs[m]->getName() == "1456") {  bestfit[3] = *(templateSeqs[m]); }
117         if (templateSeqs[m]->getName() == "1456") {  bestfit[2] = *(templateSeqs[m]); }
118         if (templateSeqs[m]->getName() == "141312") {  bestfit[1] = *(templateSeqs[m]); }
119         if (templateSeqs[m]->getName() == "141312") {  bestfit[0] = *(templateSeqs[m]); }
120
121
122 }*/
123                         
124                         for (int j = 0; j < bestfit.size(); j++) { 
125                                 //chops off beginning and end of sequences so they both start and end with a base
126                                 trimSeqs(querySeqs[j], bestfit[j], j);  
127                         }
128                         mothurOut("Done."); mothurOutEndLine();
129                         
130                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
131                                 it = trimmed[i].begin();
132                                 vector<int> win = findWindows(querySeqs[i], it->first, it->second, windowSizes[i]);
133                                 windows[i] = win;
134                         }
135
136                         
137                 } else {                createProcessesSpots();         }
138
139                 //find P
140                 if (consfile == "") {   probabilityProfile = calcFreq(templateSeqs);  }
141                 else                            {   probabilityProfile = readFreq();                      }
142                 
143                 //make P into Q
144                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];      }
145                 
146                 if (processors == 1) { 
147                                                 
148                         mothurOut("Calculating observed distance... "); cout.flush();
149                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
150                                 vector<float> obsi = calcObserved(querySeqs[i], bestfit[i], windows[i], windowSizes[i]);
151                                 obsDistance[i] = obsi;
152                         }
153                         mothurOut("Done."); mothurOutEndLine();
154                         
155                         
156                         
157                         mothurOut("Finding variability... "); cout.flush();
158                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
159                                 vector<float> q = findQav(windows[i], windowSizes[i]);
160                                 Qav[i] = q;
161                         }
162                         mothurOut("Done."); mothurOutEndLine();
163                         
164                         
165                         
166                         mothurOut("Calculating alpha... "); cout.flush();
167                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
168                                 float alpha = getCoef(obsDistance[i], Qav[i]);
169                                 seqCoef.push_back(alpha);
170                         }
171                         mothurOut("Done."); mothurOutEndLine();
172                 
173                 
174                 
175                         mothurOut("Calculating expected distance... "); cout.flush();
176                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
177                                 vector<float> exp = calcExpected(Qav[i], seqCoef[i]);
178                                 expectedDistance[i] = exp;
179                         }
180                         mothurOut("Done."); mothurOutEndLine();
181                         
182                         
183                         
184                         mothurOut("Finding deviation... "); cout.flush();
185                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
186                                 float de = calcDE(obsDistance[i], expectedDistance[i]);
187                                 DE[i] = de;
188                                 
189                                 it = trimmed[i].begin();
190                                 float dist = calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
191                                 deviation[i] = dist;
192                         }
193                         mothurOut("Done."); mothurOutEndLine();
194                         
195                 } 
196                 else {          createProcesses();              }
197                 
198                 delete distcalculator;
199                 
200                 //quantiles are used to determine whether the de values found indicate a chimera
201                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
202                 //combination of sequences in the template
203                 if (quanfile != "") {   readQuantiles();  }
204                 else {
205                         if (processors == 1) { 
206                                 quantiles = getQuantiles(lines[0]->start, lines[0]->end);
207                         }else {         createProcessesQuan();          }
208                 }
209
210         
211                 //free memory
212                 for (int i = 0; i < lines.size(); i++)                  {       delete lines[i];                }
213                         
214
215         }
216         catch(exception& e) {
217                 errorOut(e, "Pintail", "getChimeras");
218                 exit(1);
219         }
220 }
221
222 //***************************************************************************************************************
223
224 vector<Sequence*> Pintail::readSeqs(string file) {
225         try {
226         
227                 ifstream in;
228                 openInputFile(file, in);
229                 vector<Sequence*> container;
230                 
231                 //read in seqs and store in vector
232                 while(!in.eof()){
233                         Sequence* current = new Sequence(in);
234                         
235                         //if (current->getAligned() == "") { current->setAligned(current->getUnaligned()); }
236                         //takes out stuff is needed
237                         //current->setUnaligned(current->getUnaligned());
238                         
239                         container.push_back(current);
240                         
241                         gobble(in);
242                 }
243                 
244                 in.close();
245                 return container;
246                 
247         }
248         catch(exception& e) {
249                 errorOut(e, "Pintail", "readSeqs");
250                 exit(1);
251         }
252 }
253
254 //***************************************************************************************************************
255 //num is query's spot in querySeqs
256 map<int, int> Pintail::trimSeqs(Sequence* query, Sequence subject, int num) {
257         try {
258                 
259                 string q = query->getAligned();
260                 string s = subject.getAligned();
261                 
262                 int front = 0;
263                 for (int i = 0; i < q.length(); i++) {
264                         if (isalpha(q[i]) && isalpha(s[i])) { front = i; break;  }
265                 }
266                 
267                 int back = 0;           
268                 for (int i = q.length(); i >= 0; i--) {
269                         if (isalpha(q[i]) && isalpha(s[i])) { back = i; break;  }
270                 }
271                 
272                 //if num = -1 then you are calling this from quantiles
273                 if (num != -1) { 
274                         trimmed[num][front] = back; 
275                         return trimmed[num];
276                 }
277                 
278                 map<int, int> temp;
279                 temp[front] = back;
280                 return temp;
281                 
282         }
283         catch(exception& e) {
284                 errorOut(e, "Pintail", "trimSeqs");
285                 exit(1);
286         }
287 }
288
289 //***************************************************************************************************************
290
291 vector<float> Pintail::readFreq() {
292         try {
293         
294                 ifstream in;
295                 openInputFile(consfile, in);
296                 
297                 vector<float> prob;
298                 
299                 //read in probabilities and store in vector
300                 int pos; float num; 
301                 
302                 while(!in.eof()){
303                         
304                         in >> pos >> num;
305                         
306                         //do you want this spot
307                         prob.push_back(num);  
308                         
309                         gobble(in);
310                 }
311                 
312                 in.close();
313                 return prob;
314                 
315         }
316         catch(exception& e) {
317                 errorOut(e, "Pintail", "readFreq");
318                 exit(1);
319         }
320 }
321
322 //***************************************************************************************************************
323
324 vector< vector<float> > Pintail::readQuantiles() {
325         try {
326         
327                 ifstream in;
328                 openInputFile(quanfile, in);
329                 
330                 vector< vector<float> > quan;
331                 
332                 //read in probabilities and store in vector
333                 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
334                 
335                 while(!in.eof()){
336                         
337                         in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
338                         
339                         vector <float> temp;
340                         
341                         temp.push_back(ten); 
342                         temp.push_back(twentyfive);
343                         temp.push_back(fifty);
344                         temp.push_back(seventyfive);
345                         temp.push_back(ninetyfive);
346                         temp.push_back(ninetynine);
347                         
348                         //do you want this spot
349                         quan.push_back(temp);  
350                         
351                         gobble(in);
352                 }
353                 
354                 in.close();
355                 return quan;
356                 
357         }
358         catch(exception& e) {
359                 errorOut(e, "Pintail", "readQuantiles");
360                 exit(1);
361         }
362 }
363
364
365 //***************************************************************************************************************
366 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
367 vector<Sequence> Pintail::findPairs(int start, int end) {
368         try {
369                 
370                 vector<Sequence> seqsMatches;  seqsMatches.resize(end-start);
371                 
372                 for(int i = start; i < end; i++){
373                 
374                         float smallest = 10000.0;
375                         Sequence query = *(querySeqs[i]);
376                 
377                         for(int j = 0; j < templateSeqs.size(); j++){
378                                 
379                                 Sequence temp = *(templateSeqs[j]);
380                                 
381                                 distcalculator->calcDist(query, temp);
382                                 float dist = distcalculator->getDist();
383                                 
384                                 if (dist < smallest) { 
385                                         seqsMatches[i] = *(templateSeqs[j]);
386                                         smallest = dist;
387                                 }
388                         }
389                 }
390                 
391                 return seqsMatches;
392         
393         }
394         catch(exception& e) {
395                 errorOut(e, "Pintail", "findPairs");
396                 exit(1);
397         }
398 }
399
400 //***************************************************************************************************************
401 //find the window breaks for each sequence - this is so you can move ahead by bases.
402 vector<int>  Pintail::findWindows(Sequence* query, int front, int back, int& size) {
403         try {
404                 
405                 vector<int> win; 
406                 
407                 int cutoff = back - front;  //back - front 
408                         
409                 //if window is set to default
410                 if (size == 0) {  if (cutoff > 1200) {  size = 300; }
411                                                         else{  size = (cutoff / 4); }  } 
412                 else if (size > (cutoff / 4)) { 
413                                 mothurOut("You have selected to large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
414                                 size = (cutoff / 4); 
415                 }
416         
417                 string seq = query->getAligned().substr(front, cutoff);
418                         
419                 //count bases
420                 int numBases = 0;
421                 for (int l = 0; l < seq.length(); l++) {  if (isalpha(seq[l])) { numBases++; }  }
422                         
423                 //save start of seq
424                 win.push_back(front);
425                 
426                 //move ahead increment bases at a time until all bases are in a window
427                 int countBases = 0;
428                 int totalBases = 0;  //used to eliminate window of blanks at end of sequence
429                         
430                 seq = query->getAligned();
431                 for (int m = front; m < (back - size) ; m++) {
432                                 
433                         //count number of bases you see
434                         if (isalpha(seq[m])) { countBases++; totalBases++;  }
435                                 
436                         //if you have seen enough bases to make a new window
437                         if (countBases >= increment) {
438                                 win.push_back(m);  //save spot in alignment
439                                 countBases = 0;                         //reset bases you've seen in this window
440                         }
441                                 
442                         //no need to continue if all your bases are in a window
443                         if (totalBases == numBases) {   break;  }
444                 }
445                         
446                 return win;
447         
448         }
449         catch(exception& e) {
450                 errorOut(e, "Pintail", "findWindows");
451                 exit(1);
452         }
453 }
454
455 //***************************************************************************************************************
456 vector<float> Pintail::calcObserved(Sequence* query, Sequence subject, vector<int> window, int size) {
457         try {
458                 
459                 vector<float> temp;
460                                 
461                 int startpoint = 0; 
462                 for (int m = 0; m < windows.size(); m++) {
463                                                 
464                         string seqFrag = query->getAligned().substr(window[startpoint], size);
465                         string seqFragsub = subject.getAligned().substr(window[startpoint], size);
466                                                                 
467                         int diff = 0;
468                         for (int b = 0; b < seqFrag.length(); b++) {
469                                 if (seqFrag[b] != seqFragsub[b]) { diff++; }
470                         }
471                
472                         //percentage of mismatched bases
473                         float dist;
474                         dist = diff / (float) seqFrag.length() * 100;       
475                                 
476                         temp.push_back(dist);
477                                 
478                         startpoint++;
479                 }
480                         
481                 return temp;
482         }
483         catch(exception& e) {
484                 errorOut(e, "Pintail", "calcObserved");
485                 exit(1);
486         }
487 }
488 //***************************************************************************************************************
489 float Pintail::calcDist(Sequence* query, Sequence subject, int front, int back) {
490         try {
491                 
492                 //so you only look at the trimmed part of the sequence
493                 int cutoff = back - front;  
494                         
495                 //from first startpoint with length back-front
496                 string seqFrag = query->getAligned().substr(front, cutoff);
497                 string seqFragsub = subject.getAligned().substr(front, cutoff);
498                                                                                                                 
499                 int diff = 0;
500                 for (int b = 0; b < seqFrag.length(); b++) {
501                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
502                 }
503                
504                 //percentage of mismatched bases
505                 float dist = diff / (float) seqFrag.length() * 100;       
506                                 
507                 return dist;
508         }
509         catch(exception& e) {
510                 errorOut(e, "Pintail", "calcDist");
511                 exit(1);
512         }
513 }
514
515 //***************************************************************************************************************
516 vector<float> Pintail::calcExpected(vector<float> qav, float coef) {
517         try {
518                 
519                 //for each window
520                 vector<float> queryExpected;
521                         
522                 for (int m = 0; m < qav.size(); m++) {          
523                                 
524                         float expected = qav[m] * coef;
525                                 
526                         queryExpected.push_back(expected);      
527                 }
528                         
529                 return queryExpected;
530                                 
531         }
532         catch(exception& e) {
533                 errorOut(e, "Pintail", "calcExpected");
534                 exit(1);
535         }
536 }
537 //***************************************************************************************************************
538 float Pintail::calcDE(vector<float> obs, vector<float> exp) {
539         try {
540                 
541                 //for each window
542                 float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
543                 for (int m = 0; m < obsDistance.size(); m++) {          sum += ((obs[m] - exp[m]) * (obs[m] - exp[m]));         }
544                         
545                 float de = sqrt((sum / (obsDistance.size() - 1)));
546                         
547                 return de;
548         }
549         catch(exception& e) {
550                 errorOut(e, "Pintail", "calcDE");
551                 exit(1);
552         }
553 }
554
555 //***************************************************************************************************************
556
557 vector<float> Pintail::calcFreq(vector<Sequence*> seqs) {
558         try {
559
560                 vector<float> prob;
561                 string freqfile = getRootName(templateFile) + "probability";
562                 ofstream outFreq;
563                 
564                 openOutputFile(freqfile, outFreq);
565                 
566                 //at each position in the sequence
567                 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
568                         
569                         vector<int> freq;   freq.resize(4,0);
570                         int gaps = 0;
571                         
572                         //find the frequency of each nucleotide
573                         for (int j = 0; j < seqs.size(); j++) {
574                                 
575                                 char value = seqs[j]->getAligned()[i];
576                                 
577                                 if(toupper(value) == 'A')                                                                       {       freq[0]++;      }
578                                 else if(toupper(value) == 'T' || toupper(value) == 'U')         {       freq[1]++;      }
579                                 else if(toupper(value) == 'G')                                                          {       freq[2]++;      }
580                                 else if(toupper(value) == 'C')                                                          {       freq[3]++;      }
581                                 else { gaps++; }
582                         }
583                         
584                         //find base with highest frequency
585                         int highest = 0;
586                         for (int m = 0; m < freq.size(); m++) {   if (freq[m] > highest) {  highest = freq[m];  }               }
587                         
588                         float highFreq;
589                         if ( (seqs.size() - gaps) == 0 ) {  highFreq = 1.0;  }                  
590                         else { highFreq = highest / (float) (seqs.size() - gaps);        }
591                         //highFreq = highest / (float) seqs.size();
592                         //cout << i << '\t' << highFreq << endl;
593                         
594                         float Pi;
595                         Pi =  (highFreq - 0.25) / 0.75; 
596                         
597                         //cannot have probability less than 0.
598                         if (Pi < 0) { Pi = 0.0; }
599                         
600                         //saves this for later
601                         outFreq << i+1 << '\t' << Pi << endl;
602                         
603                         prob.push_back(Pi); 
604                 }
605                 
606                 outFreq.close();
607                 
608                 return prob;
609                                 
610         }
611         catch(exception& e) {
612                 errorOut(e, "Pintail", "calcFreq");
613                 exit(1);
614         }
615 }
616 //***************************************************************************************************************
617 vector<float>  Pintail::findQav(vector<int> window, int size) {
618         try {
619                 vector<float>  averages; 
620                                 
621                 //for each window find average
622                 for (int m = 0; m < windows.size(); m++) {
623                                 
624                         float average = 0.0;
625                                 
626                         //while you are in the window for this sequence
627                         for (int j = window[m]; j < (window[m]+size); j++) {   average += probabilityProfile[j];        }
628                                 
629                         average = average / size;
630         
631                         //save this windows average
632                         averages.push_back(average);
633                 }
634                                 
635                 return averages;
636         }
637         catch(exception& e) {
638                 errorOut(e, "Pintail", "findQav");
639                 exit(1);
640         }
641 }
642
643 //***************************************************************************************************************
644 vector< vector<float> > Pintail::getQuantiles(int start, int end) {
645         try {
646                 vector< vector<float> > quan; 
647                 
648                 //for each sequence
649                 for(int i = start; i < end; i++){
650                 
651                         Sequence* query = templateSeqs[i];
652                         
653                         //compare to every other sequence in template
654                         for(int j = 0; j < i; j++){
655                                 
656                                 Sequence subject = *(templateSeqs[j]);
657                                 
658                                 map<int, int> trim = trimSeqs(query, subject, -1);
659                                 
660                                 
661                                 
662                         
663                         
664                         }
665                         
666                         
667                         
668                 }
669                 return quan;
670                                                 
671         }
672         catch(exception& e) {
673                 errorOut(e, "Pintail", "findQav");
674                 exit(1);
675         }
676 }
677
678 //***************************************************************************************************************
679 float Pintail::getCoef(vector<float> obs, vector<float> qav) {
680         try {
681         
682                 //find average prob for this seqs windows
683                 float probAverage = 0.0;
684                 for (int j = 0; j < qav.size(); j++) {   probAverage += qav[j]; }
685                 probAverage = probAverage / (float) Qav.size();
686                 
687                 //find observed average 
688                 float obsAverage = 0.0;
689                 for (int j = 0; j < obs.size(); j++) {   obsAverage += obs[j];  }
690                 obsAverage = obsAverage / (float) obs.size();
691                 
692
693                 float coef = obsAverage / probAverage;
694                                                 
695                 return coef;
696         }
697         catch(exception& e) {
698                 errorOut(e, "Pintail", "getCoef");
699                 exit(1);
700         }
701 }
702 /**************************************************************************************************/
703
704 void Pintail::createProcessesSpots() {
705         try {
706 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
707                 int process = 0;
708                 vector<int> processIDS;
709                 vector< vector<int> > win; win.resize(querySeqs.size());
710                 
711                 //loop through and create all the processes you want
712                 while (process != processors) {
713                         int pid = fork();
714                         
715                         if (pid > 0) {
716                                 processIDS.push_back(pid);  
717                                 process++;
718                         }else if (pid == 0){
719                                 
720                                 vector<Sequence> tempbest;
721                                 tempbest = findPairs(lines[process]->start, lines[process]->end);
722                                 int count = 0;
723                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
724                                         bestfit[i] = tempbest[count];
725                                         
726                                         //chops off beginning and end of sequences so they both start and end with a base
727                                         trimSeqs(querySeqs[i], bestfit[i], i);
728                                         count++;
729                                 }
730                                 
731                                 
732                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
733                                         vector<int> temp = findWindows(querySeqs[i], it->first, it->second, windowSizes[i]);
734                                         win[i] = temp;
735                                 }
736                                 
737                                 exit(0);
738                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
739                 }
740                 
741                 //force parent to wait until all the processes are done
742                 for (int i=0;i<processors;i++) { 
743                         int temp = processIDS[i];
744                         wait(&temp);
745                 }
746                 
747                 windows = win;
748 #else
749                 bestfit = findPairs(lines[0]->start, lines[0]->end);
750                 for (int j = 0; j < bestfit.size(); j++) {
751                                 //chops off beginning and end of sequences so they both start and end with a base
752                                 trimSeqs(querySeqs[j], bestfit[j], j);  
753                 }
754
755                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
756                                 it = trimmed[i].begin();
757                                 map<int, int> win = findWindows(querySeqs[i], it->first, it->second, windowSizes[i]);
758                                 windows[i] = win;
759                 }
760
761 #endif          
762         }
763         catch(exception& e) {
764                 errorOut(e, "Pintail", "createProcessesSpots");
765                 exit(1);
766         }
767 }
768
769
770 /**************************************************************************************************/
771
772 void Pintail::createProcesses() {
773         try {
774 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
775                 int process = 0;
776                 vector<int> processIDS;
777                 
778                 vector< vector<float> > exp;  exp.resize(querySeqs.size());
779                 vector<float> de; de.resize(querySeqs.size());
780                 vector< vector<float> > obs; obs.resize(querySeqs.size());
781                 vector<float> dev; dev.resize(querySeqs.size());
782                 
783                 
784                 //loop through and create all the processes you want
785                 while (process != processors) {
786                         int pid = fork();
787                         
788                         if (pid > 0) {
789                                 processIDS.push_back(pid);  
790                                 process++;
791                         }else if (pid == 0){
792                                 
793                                 //calc obs
794                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
795                                         vector<float> obsi = calcObserved(querySeqs[i], bestfit[i], windows[i], windowSizes[i]);
796                                         obs[i] = obsi;
797                                 
798                                         //calc Qav
799                                         vector<float> q = findQav(windows[i], windowSizes[i]);
800                                         
801                                         //get alpha
802                                         float alpha = getCoef(obsDistance[i], q);
803                                         
804                                         //find expected
805                                         vector<float> exp = calcExpected(q, alpha);
806                                         expectedDistance[i] = exp;
807                                         
808                                         //get de and deviation
809                                         float dei = calcDE(obsi, exp);
810                                         de[i] = dei;
811                                         
812                                         it = trimmed[i].begin();
813                                         float dist = calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
814                                         dev[i] = dist;
815                                 }
816                                 
817                                 exit(0);
818                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
819                 }
820                 
821                 //force parent to wait until all the processes are done
822                 for (int i=0;i<processors;i++) { 
823                         int temp = processIDS[i];
824                         wait(&temp);
825                 }
826                 
827                 obsDistance = obs;
828                 expectedDistance = exp;
829                 DE = de;
830                 deviation = dev;
831                 
832 #else
833                         mothurOut("Calculating observed distance... "); cout.flush();
834                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
835                                 vector<float> obsi = calcObserved(querySeqs[i], bestfit[i], windows[i], windowSizes[i]);
836                                 obsDistance[i] = obsi;
837                         }
838                         mothurOut("Done."); mothurOutEndLine();
839                         
840                         
841                         
842                         mothurOut("Finding variability... "); cout.flush();
843                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
844                                 vector<float> q = findQav(windows[i], windowSizes[i]);
845                                 Qav[i] = q;
846                         }
847                         mothurOut("Done."); mothurOutEndLine();
848                         
849                         
850                         
851                         mothurOut("Calculating alpha... "); cout.flush();
852                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
853                                 float alpha = getCoef(obsDistance[i], Qav[i]);
854                                 seqCoef.push_back(alpha);
855                         }
856                         mothurOut("Done."); mothurOutEndLine();
857                 
858                 
859                 
860                         mothurOut("Calculating expected distance... "); cout.flush();
861                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
862                                 vector<float> exp = calcExpected(Qav[i], seqCoef[i]);
863                                 expectedDistance[i] = exp;
864                         }
865                         mothurOut("Done."); mothurOutEndLine();
866                         
867                         
868                         
869                         mothurOut("Finding deviation... "); cout.flush();
870                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
871                                 float de = calcDE(obsDistance[i], expectedDistance[i]);
872                                 DE[i] = de;
873                                 
874                                 it = trimmed[i].begin();
875                                 float dist = calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
876                                 deviation[i] = dist;
877                         }
878                         mothurOut("Done."); mothurOutEndLine();
879
880 #endif          
881         }
882         catch(exception& e) {
883                 errorOut(e, "Pintail", "createProcesses");
884                 exit(1);
885         }
886 }
887
888
889 /**************************************************************************************************/
890
891 void Pintail::createProcessesQuan() {
892         try {
893 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
894                 int process = 0;
895                 vector<int> processIDS;
896                                 
897                 //loop through and create all the processes you want
898                 while (process != processors) {
899                         int pid = fork();
900                         
901                         if (pid > 0) {
902                                 processIDS.push_back(pid);  
903                                 process++;
904                         }else if (pid == 0){
905                                 
906                                 
907                                 exit(0);
908                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
909                 }
910                 
911                 //force parent to wait until all the processes are done
912                 for (int i=0;i<processors;i++) { 
913                         int temp = processIDS[i];
914                         wait(&temp);
915                 }
916                 
917 #else
918
919 #endif          
920         }
921         catch(exception& e) {
922                 errorOut(e, "Pintail", "createProcessesQuan");
923                 exit(1);
924         }
925 }
926
927
928 //***************************************************************************************************************
929
930