]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
last changes before move
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12
13 //********************************************************************************************************************
14 //sorts lowest to highest
15 inline bool compareQuanMembers(quanMember left, quanMember right){
16         return (left.score < right.score);      
17
18 //***************************************************************************************************************
19
20 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
21 //***************************************************************************************************************
22
23 Pintail::~Pintail() {
24         try {
25                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
26                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
27                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
28         }
29         catch(exception& e) {
30                 errorOut(e, "Pintail", "~Pintail");
31                 exit(1);
32         }
33 }       
34 //***************************************************************************************************************
35 void Pintail::print(ostream& out) {
36         try {
37                 
38                 mothurOutEndLine();
39                 
40                 for (int i = 0; i < querySeqs.size(); i++) {
41                         
42                         int index = ceil(deviation[i]);
43                         
44                         //is your DE value higher than the 95%
45                         string chimera;
46                         if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
47                         else                                                                    {       chimera = "No";         }
48                         
49                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
50                         if (chimera == "Yes") {
51                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
52                         }
53                         out << "Observed\t";
54                         
55                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
56                         out << endl;
57                         
58                         out << "Expected\t";
59                         
60                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
61                         out << endl;
62                         
63                 }
64         }
65         catch(exception& e) {
66                 errorOut(e, "Pintail", "print");
67                 exit(1);
68         }
69 }
70
71 //***************************************************************************************************************
72 void Pintail::getChimeras() {
73         try {
74                 
75                 //read in query sequences and subject sequences
76                 mothurOut("Reading sequences and template file... "); cout.flush();
77                 querySeqs = readSeqs(fastafile);
78                 templateSeqs = readSeqs(templateFile);
79                 mothurOut("Done."); mothurOutEndLine();
80                 
81                 int numSeqs = querySeqs.size();
82                 
83                 obsDistance.resize(numSeqs);
84                 expectedDistance.resize(numSeqs);
85                 seqCoef.resize(numSeqs);
86                 DE.resize(numSeqs);
87                 Qav.resize(numSeqs);
88                 bestfit.resize(numSeqs);
89                 deviation.resize(numSeqs);
90                 trimmed.resize(numSeqs);
91                 windowSizes.resize(numSeqs, window);
92                 windowSizesTemplate.resize(templateSeqs.size(), window);
93                 windowsForeachQuery.resize(numSeqs);
94                 h.resize(numSeqs);
95                 quantiles.resize(100);  //one for every percent mismatch
96                 quantilesMembers.resize(100);  //one for every percent mismatch
97                 
98                 //break up file if needed
99                 int linesPerProcess = numSeqs / processors ;
100                 
101                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
102                         //find breakup of sequences for all times we will Parallelize
103                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
104                         else {
105                                 //fill line pairs
106                                 for (int i = 0; i < (processors-1); i++) {                      
107                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
108                                 }
109                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
110                                 int i = processors - 1;
111                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
112                         }
113                         
114                         //find breakup of templatefile for quantiles
115                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
116                         else { 
117                                 for (int i = 0; i < processors; i++) {
118                                         templateLines.push_back(new linePair());
119                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
120                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
121                                 }
122                         }
123                 #else
124                         lines.push_back(new linePair(0, numSeqs));
125                         templateLines.push_back(new linePair(0, templateSeqs.size()));
126                 #endif
127                 
128                 distcalculator = new ignoreGaps();
129                 decalc = new DeCalculator();
130                 
131                 //if the user does enter a mask then you want to keep all the spots in the alignment
132                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
133                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
134                 
135                 decalc->setMask(seqMask);
136                 
137                 //find pairs
138                 if (processors == 1) { 
139                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
140                         bestfit = findPairs(lines[0]->start, lines[0]->end);
141                         mothurOut("Done."); mothurOutEndLine();
142                 }else {         createProcessesPairs();         }
143                 
144                 
145                 //find P
146                 mothurOut("Getting conservation... "); cout.flush();
147                 if (consfile == "") { 
148                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
149                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
150                         mothurOut("Done."); mothurOutEndLine();
151                 }else                           {   probabilityProfile = readFreq();                      }
152
153                 //make P into Q
154                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
155                 mothurOut("Done."); mothurOutEndLine();
156                 
157                 //mask sequences if the user wants to 
158                 if (seqMask != "") {
159                         //mask querys
160                         for (int i = 0; i < querySeqs.size(); i++) {
161                                 decalc->runMask(querySeqs[i]);
162                         }
163                 
164                         //mask templates
165                         for (int i = 0; i < templateSeqs.size(); i++) {
166                                 decalc->runMask(templateSeqs[i]);
167                         }
168                         
169                         for (int i = 0; i < bestfit.size(); i++) { 
170                                 decalc->runMask(bestfit[i]);
171                         }
172
173                 }
174                                                 
175                 if (processors == 1) { 
176         
177                         for (int j = 0; j < bestfit.size(); j++) { 
178                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
179                         }
180                         
181                         mothurOut("Finding window breaks... "); cout.flush();
182                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
183                                 it = trimmed[i].begin();
184                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
185                                 windowsForeachQuery[i] = win;
186                         }
187                         mothurOut("Done."); mothurOutEndLine();
188                 
189                 }else {         createProcessesSpots();         }
190                 
191                 if (processors == 1) { 
192                                                 
193                         mothurOut("Calculating observed distance... "); cout.flush();
194                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
195                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
196         
197                                 obsDistance[i] = obsi;
198                         }
199                         mothurOut("Done."); mothurOutEndLine();
200                         
201                         
202                         mothurOut("Finding variability... "); cout.flush();
203                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
204                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
205
206                                 Qav[i] = q;
207                         }
208                         mothurOut("Done."); mothurOutEndLine();
209                         
210                         
211                         mothurOut("Calculating alpha... "); cout.flush();
212                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
213                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
214                                 seqCoef[i] = alpha;
215                         }
216                         mothurOut("Done."); mothurOutEndLine();
217                 
218                 
219                         mothurOut("Calculating expected distance... "); cout.flush();
220                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
221                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
222                                 expectedDistance[i] = exp;
223                         }
224                         mothurOut("Done."); mothurOutEndLine();
225                         
226                         
227                         mothurOut("Finding deviation... "); cout.flush();
228                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
229                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
230                                 DE[i] = de;
231                         
232                                 it = trimmed[i].begin();
233                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
234                                 deviation[i] = dist;
235                         }
236                         mothurOut("Done."); mothurOutEndLine();
237                         
238                 } 
239                 else {          createProcesses();              }
240                 
241                 //quantiles are used to determine whether the de values found indicate a chimera
242                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
243                 //combination of sequences in the template
244                 if (quanfile != "") {  quantiles =  readQuantiles();  }
245                 else {
246                         
247                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
248                         if (processors == 1) { 
249                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
250                         }else {         createProcessesQuan();          }
251                         
252                         
253                                                 
254                         
255                         //decided against this because we were having trouble setting the sensitivity... may want to revisit this...
256                         //quantiles = decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
257                         
258                         ofstream out4;
259                         string o;
260                         
261                         o = getRootName(templateFile) + "quan";
262                         
263                         openOutputFile(o, out4);
264                         
265                         //adjust quantiles
266                         for (int i = 0; i < quantilesMembers.size(); i++) {
267                                 vector<float> temp;
268                                 
269                                 if (quantilesMembers[i].size() == 0) {
270                                         //in case this is not a distance found in your template files
271                                         for (int g = 0; g < 6; g++) {
272                                                 temp.push_back(0.0);
273                                         }
274                                 }else{
275                                         
276                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
277                                         
278                                         //save 10%
279                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
280                                         //save 25%
281                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
282                                         //save 50%
283                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
284                                         //save 75%
285                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
286                                         //save 95%
287                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
288                                         //save 99%
289                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
290                                         
291                                 }
292                                 
293                                 //output quan value
294                                 out4 << i+1 << '\t';                            
295                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
296                                 out4 << endl;
297                                 
298                                 quantiles[i] = temp;
299                                 
300                         }
301                         
302                         out4.close();
303                         
304                         mothurOut("Done."); mothurOutEndLine();
305                         
306                 }
307                         
308                 //free memory
309                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
310                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
311                         
312                 delete distcalculator;
313                 delete decalc;
314         }
315         catch(exception& e) {
316                 errorOut(e, "Pintail", "getChimeras");
317                 exit(1);
318         }
319 }
320
321 //***************************************************************************************************************
322
323 vector<float> Pintail::readFreq() {
324         try {
325         
326                 ifstream in;
327                 openInputFile(consfile, in);
328                 
329                 vector<float> prob;
330                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
331                 
332                 //read in probabilities and store in vector
333                 int pos; float num; 
334                 
335                 while(!in.eof()){
336                         
337                         in >> pos >> num;
338                         
339                         if (h.count(pos) > 0) {
340                                 float Pi;
341                                 Pi =  (num - 0.25) / 0.75; 
342                         
343                                 //cannot have probability less than 0.
344                                 if (Pi < 0) { Pi = 0.0; }
345
346                                 //do you want this spot
347                                 prob.push_back(Pi);  
348                         }
349                         
350                         gobble(in);
351                 }
352                 
353                 in.close();
354                 return prob;
355                 
356         }
357         catch(exception& e) {
358                 errorOut(e, "Pintail", "readFreq");
359                 exit(1);
360         }
361 }
362
363 //***************************************************************************************************************
364
365 vector< vector<float> > Pintail::readQuantiles() {
366         try {
367         
368                 ifstream in;
369                 openInputFile(quanfile, in);
370                 
371                 vector< vector<float> > quan;
372         
373                 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
374                 
375                 while(!in.eof()){
376                         
377                         in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
378                         
379                         vector <float> temp;
380                         
381                         temp.push_back(ten); 
382                         temp.push_back(twentyfive);
383                         temp.push_back(fifty);
384                         temp.push_back(seventyfive);
385                         temp.push_back(ninetyfive);
386                         temp.push_back(ninetynine);
387                         
388                         quan.push_back(temp);  
389                         
390                         gobble(in);
391                 }
392                 
393                 in.close();
394                 return quan;
395                 
396         }
397         catch(exception& e) {
398                 errorOut(e, "Pintail", "readQuantiles");
399                 exit(1);
400         }
401 }
402 //***************************************************************************************************************
403 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
404 vector<Sequence*> Pintail::findPairs(int start, int end) {
405         try {
406                 
407                 vector<Sequence*> seqsMatches;  
408                 
409                 for(int i = start; i < end; i++){
410                 
411                         float smallest = 10000.0;
412                         Sequence query = *(querySeqs[i]);
413                         Sequence* match;
414                         
415                         for(int j = 0; j < templateSeqs.size(); j++){
416                                 
417                                 Sequence temp = *(templateSeqs[j]);
418                                 
419                                 distcalculator->calcDist(query, temp);
420                                 float dist = distcalculator->getDist();
421                                 
422                                 if (dist < smallest) { 
423                                         match = templateSeqs[j];
424                                         smallest = dist;
425                                 }
426                         }
427                         
428                         //make copy so trimSeqs doesn't overtrim
429                         Sequence* copy = new Sequence(match->getName(), match->getAligned());
430                         
431                         seqsMatches.push_back(copy);
432                 }
433                 
434                 return seqsMatches;
435         
436         }
437         catch(exception& e) {
438                 errorOut(e, "Pintail", "findPairs");
439                 exit(1);
440         }
441 }
442
443 /**************************************************************************************************/
444
445 void Pintail::createProcessesSpots() {
446         try {
447 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
448                 int process = 0;
449                 vector<int> processIDS;
450                 
451                 //loop through and create all the processes you want
452                 while (process != processors) {
453                         int pid = fork();
454                         
455                         if (pid > 0) {
456                                 processIDS.push_back(pid);  
457                                 process++;
458                         }else if (pid == 0){
459                                 
460                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
461                                 
462                                         //chops off beginning and end of sequences so they both start and end with a base
463                                         map<int, int> trim;
464
465                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
466                                         trimmed[j] = trim;
467                                         
468                                 }
469
470                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
471                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
472                                         it = trimmed[i].begin();
473                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
474                                 }
475                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
476                                 
477                                 //write out data to file so parent can read it
478                                 ofstream out;
479                                 string s = toString(getpid()) + ".temp";
480                                 openOutputFile(s, out);
481                                 
482                                 //output windowsForeachQuery
483                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
484                                         out << windowsForeachQuery[i].size() << '\t';
485                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
486                                                 out << windowsForeachQuery[i][j] << '\t';
487                                         }
488                                         out << endl;
489                                 }
490                         
491                                 //output windowSizes
492                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
493                                         out << windowSizes[i] << '\t';
494                                 }
495                                 out << endl;
496                                 
497                                 //output trimmed values
498                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
499                                         it = trimmed[i].begin();                                        
500                                         out << it->first << '\t' << it->second << endl;
501                                 }
502                                 out.close();
503                                 
504                                 exit(0);
505                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
506                 }
507                 
508                 //force parent to wait until all the processes are done
509                 for (int i=0;i<processors;i++) { 
510                         int temp = processIDS[i];
511                         wait(&temp);
512                 }
513                 
514                 //get data created by processes
515                 for (int i=0;i<processors;i++) { 
516                         ifstream in;
517                         string s = toString(processIDS[i]) + ".temp";
518                         openInputFile(s, in);
519                         
520                         int size = lines[i]->end - lines[i]->start;
521                                         
522                         int count = lines[i]->start;
523                         for (int m = 0; m < size; m++) {
524                                 int num;
525                                 in >> num;
526                         
527                                 vector<int> win;  int w;
528                                 for (int j = 0; j < num; j++) {
529                                         in >> w;
530                                         win.push_back(w);
531                                 }
532                         
533                                 windowsForeachQuery[count] = win;
534                                 count++;
535                                 gobble(in);
536                         }
537                 
538                         gobble(in);
539                         count = lines[i]->start;
540                         for (int m = 0; m < size; m++) {
541                                 int num;
542                                 in >> num;
543                                 
544                                 windowSizes[count] = num;
545                                 count++;
546                         }
547                         
548                         gobble(in);
549                         
550                         count = lines[i]->start;
551                         for (int m = 0; m < size; m++) {
552                                 int front, back;
553                                 in >> front >> back;
554                         
555                                 map<int, int> t;
556                                 
557                                 t[front] = back;
558                                 
559                                 trimmed[count] = t;
560                                 count++;
561                                 
562                                 gobble(in);
563                         }
564
565                         
566                         in.close();
567                         remove(s.c_str());
568                 }
569                         
570         
571 #else
572                 for (int j = 0; j < bestfit.size(); j++) {
573                         //chops off beginning and end of sequences so they both start and end with a base
574                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
575                 }
576
577                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
578                                 it = trimmed[i].begin();
579                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
580                                 windowsForeachQuery[i] = win;
581                 }
582
583 #endif          
584         }
585         catch(exception& e) {
586                 errorOut(e, "Pintail", "createProcessesSpots");
587                 exit(1);
588         }
589 }
590 /**************************************************************************************************/
591
592 void Pintail::createProcessesPairs() {
593         try {
594 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
595                 int process = 0;
596                 vector<int> processIDS;
597                 
598                 //loop through and create all the processes you want
599                 while (process != processors) {
600                         int pid = fork();
601                         
602                         if (pid > 0) {
603                                 processIDS.push_back(pid);  
604                                 process++;
605                         }else if (pid == 0){
606                                 
607                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
608                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
609                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
610                                 
611                                 //write out data to file so parent can read it
612                                 ofstream out;
613                                 string s = toString(getpid()) + ".temp";
614                                 openOutputFile(s, out);
615                                 
616                                 //output range and size
617                                 out << bestfit.size() << endl;
618                                 
619                                 //output pairs
620                                 for (int i = 0; i < bestfit.size(); i++) {
621                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
622                                 }
623                                 out.close();
624                                 
625                                 exit(0);
626                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
627                 }
628                 
629                 //force parent to wait until all the processes are done
630                 for (int i=0;i<processors;i++) { 
631                         int temp = processIDS[i];
632                         wait(&temp);
633                 }
634                 
635                 //get data created by processes
636                 for (int i=0;i<processors;i++) { 
637                         ifstream in;
638                         string s = toString(processIDS[i]) + ".temp";
639                         openInputFile(s, in);
640                         
641                         int size;
642                         in >> size;  gobble(in);
643                                 
644                         //get pairs
645                         int count = lines[i]->start;
646                         for (int m = 0; m < size; m++) {
647                                 Sequence* temp = new Sequence(in);
648                                 bestfit[count] = temp;
649                         
650                                 count++;
651                                 gobble(in);
652                         }
653                         
654                         in.close();
655                         remove(s.c_str());
656                 }
657                         
658         
659 #else
660                 bestfit = findPairs(lines[0]->start, lines[0]->end);
661 #endif          
662         }
663         catch(exception& e) {
664                 errorOut(e, "Pintail", "createProcessesPairs");
665                 exit(1);
666         }
667 }
668 /**************************************************************************************************/
669
670 void Pintail::createProcesses() {
671         try {
672 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
673                 int process = 0;
674                 vector<int> processIDS;
675                 
676                 //loop through and create all the processes you want
677                 while (process != processors) {
678                         int pid = fork();
679                         
680                         if (pid > 0) {
681                                 processIDS.push_back(pid);  
682                                 process++;
683                         }else if (pid == 0){
684                                 
685                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
686                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
687                                         
688                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
689                                         obsDistance[i] = obsi;
690                                 
691                                         //calc Qav
692                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
693                                         
694                                         //get alpha
695                                         float alpha = decalc->getCoef(obsDistance[i], q);
696                                         
697                                         //find expected
698                                         vector<float> exp = decalc->calcExpected(q, alpha);
699                                         expectedDistance[i] = exp;
700                                         
701                                         //get de and deviation
702                                         float dei = decalc->calcDE(obsi, exp);
703                                         DE[i] = dei;
704                                         
705                                         it = trimmed[i].begin();
706                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
707                                         deviation[i] = dist;
708                                 }
709                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
710                                 
711                                 //write out data to file so parent can read it
712                                 ofstream out;
713                                 string s = toString(getpid()) + ".temp";
714                                 openOutputFile(s, out);
715                                 
716                                 int size = lines[process]->end - lines[process]->start;
717                                 out << size << endl;
718                                                                 
719                                 //output observed distances
720                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
721                                         out << obsDistance[i].size() << '\t';
722                                         for (int j = 0; j < obsDistance[i].size(); j++) {
723                                                 out << obsDistance[i][j] << '\t';
724                                         }
725                                         out << endl;
726                                 }
727                                 
728                                 
729                                 //output expected distances
730                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
731                                         out << expectedDistance[i].size() << '\t';
732                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
733                                                 out << expectedDistance[i][j] << '\t';
734                                         }
735                                         out << endl;
736                                 }
737
738                         
739                                 //output de values
740                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
741                                         out << DE[i] << '\t';
742                                 }
743                                 out << endl;    
744                                 
745                                 //output de values
746                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
747                                         out << deviation[i] << '\t';
748                                 }
749                                 out << endl;
750                                 
751                                 out.close();
752
753                                 exit(0);
754                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
755                 }
756                 
757                 //force parent to wait until all the processes are done
758                 for (int i=0;i<processors;i++) { 
759                         int temp = processIDS[i];
760                         wait(&temp);
761                 }
762                 
763                 //get data created by processes
764                 for (int i=0;i<processors;i++) { 
765                         ifstream in;
766                         string s = toString(processIDS[i]) + ".temp";
767                         openInputFile(s, in);
768                         
769                         int size;
770                         in >> size;  gobble(in);
771                         
772                         //get observed distances
773                         int count = lines[i]->start;
774                         for (int m = 0; m < size; m++) {
775                                 int num;
776                                 in >> num;
777                         
778                                 vector<float> obs;  float w;
779                                 for (int j = 0; j < num; j++) {
780                                         in >> w;
781                                         obs.push_back(w);
782                                 }
783                         
784                                 obsDistance[count] = obs;
785                                 count++;
786                                 gobble(in);
787                         }
788                         
789                         gobble(in);
790                         
791                         //get expected distances
792                         count = lines[i]->start;
793                         for (int m = 0; m < size; m++) {
794                                 int num;
795                                 in >> num;
796                         
797                                 vector<float> exp;  float w;
798                                 for (int j = 0; j < num; j++) {
799                                         in >> w;
800                                         exp.push_back(w);
801                                 }
802                         
803                                 expectedDistance[count] = exp;
804                                 count++;
805                                 gobble(in);
806                         }
807
808                         gobble(in);
809                         
810                         count = lines[i]->start;
811                         for (int m = 0; m < size; m++) {
812                                 float num;
813                                 in >> num;
814                                 
815                                 DE[count] = num;
816                                 count++;
817                         }
818                         
819                         gobble(in);
820                         
821                         count = lines[i]->start;
822                         for (int m = 0; m < size; m++) {
823                                 float num;
824                                 in >> num;
825                                 
826                                 deviation[count] = num;
827                                 count++;
828                         }
829
830                         in.close();
831                         remove(s.c_str());
832                 }
833
834                                 
835 #else
836                         mothurOut("Calculating observed distance... "); cout.flush();
837                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
838                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
839                                 obsDistance[i] = obsi;
840                         }
841                         mothurOut("Done."); mothurOutEndLine();
842                         
843                         
844                         
845                         mothurOut("Finding variability... "); cout.flush();
846                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
847                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
848                                 Qav[i] = q;
849                         }
850                         mothurOut("Done."); mothurOutEndLine();
851                         
852                         
853                         
854                         mothurOut("Calculating alpha... "); cout.flush();
855                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
856                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
857                                 seqCoef.push_back(alpha);
858                         }
859                         mothurOut("Done."); mothurOutEndLine();
860                 
861                 
862                 
863                         mothurOut("Calculating expected distance... "); cout.flush();
864                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
865                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
866                                 expectedDistance[i] = exp;
867                         }
868                         mothurOut("Done."); mothurOutEndLine();
869                         
870                         
871                         
872                         mothurOut("Finding deviation... "); cout.flush();
873                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
874                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
875                                 DE[i] = de;
876                                 
877                                 it = trimmed[i].begin();
878                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
879                                 deviation[i] = dist;
880                         }
881                         mothurOut("Done."); mothurOutEndLine();
882
883 #endif          
884         }
885         catch(exception& e) {
886                 errorOut(e, "Pintail", "createProcesses");
887                 exit(1);
888         }
889 }
890
891
892 /**************************************************************************************************/
893
894 void Pintail::createProcessesQuan() {
895         try {
896 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
897                 int process = 0;
898                 vector<int> processIDS;
899                                 
900                 //loop through and create all the processes you want
901                 while (process != processors) {
902                         int pid = fork();
903                         
904                         if (pid > 0) {
905                                 processIDS.push_back(pid);  
906                                 process++;
907                         }else if (pid == 0){
908                                 
909                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
910                                 
911                                 //write out data to file so parent can read it
912                                 ofstream out;
913                                 string s = toString(getpid()) + ".temp";
914                                 openOutputFile(s, out);
915                                 
916                                                                 
917                                 //output observed distances
918                                 for (int i = 0; i < quantilesMembers.size(); i++) {
919                                         out << quantilesMembers[i].size() << '\t';
920                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
921                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
922                                         }
923                                         out << endl;
924                                 }
925                                 
926                                 out.close();
927                                 
928                                 exit(0);
929                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
930                 }
931                 
932                 //force parent to wait until all the processes are done
933                 for (int i=0;i<processors;i++) { 
934                         int temp = processIDS[i];
935                         wait(&temp);
936                 }
937
938                 //get data created by processes
939                 for (int i=0;i<processors;i++) { 
940                         ifstream in;
941                         string s = toString(processIDS[i]) + ".temp";
942                         openInputFile(s, in);
943                         
944                         vector< vector<quanMember> > quan; 
945                         quan.resize(100);
946                         
947                         //get quantiles
948                         for (int m = 0; m < quan.size(); m++) {
949                                 int num;
950                                 in >> num; 
951                                 
952                                 gobble(in);
953
954                                 vector<quanMember> q;  float w; int b, n;
955                                 for (int j = 0; j < num; j++) {
956                                         in >> w >> b >> n;
957         //cout << w << '\t' << b << '\t' n << endl;
958                                         quanMember newMember(w, b, n);
959                                         q.push_back(newMember);
960                                 }
961 //cout << "here" << endl;
962                                 quan[m] = q;
963 //cout << "now here" << endl;
964                                 gobble(in);
965                         }
966                         
967         
968                         //save quan in quantiles
969                         for (int j = 0; j < quan.size(); j++) {
970                                 //put all values of q[i] into quan[i]
971                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
972                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
973                         }
974                                         
975                         in.close();
976                         remove(s.c_str());
977                 }
978                 
979 #else
980                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
981 #endif          
982         }
983         catch(exception& e) {
984                 errorOut(e, "Pintail", "createProcessesQuan");
985                 exit(1);
986         }
987 }
988
989
990 //***************************************************************************************************************
991
992