]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
Thallinger changes to cluster command.
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12
13 //***************************************************************************************************************
14
15 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Pintail::~Pintail() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22                 
23                 if (processors != 1) {   for (int i = 0; i < bestfit.size(); i++)       {  delete bestfit[i];   }  }
24         }
25         catch(exception& e) {
26                 errorOut(e, "Pintail", "~Pintail");
27                 exit(1);
28         }
29 }       
30 //***************************************************************************************************************
31 void Pintail::print(ostream& out) {
32         try {
33                 
34                 for (int i = 0; i < querySeqs.size(); i++) {
35                         
36                         int index = ceil(deviation[i]);
37 float quan = 2.64 * log10(deviation[i]) + 1.46;
38 cout << "dist = " << index << endl;
39 cout << "de = "  << DE[i] << endl;
40 cout << "mallard quantile = " << quan << endl;
41 cout << "my quantile = " << quantiles[index][4] << endl;
42                         
43                         //is your DE value higher than the 95%
44                         string chimera;
45                         if (DE[i] > quantiles[index][4])        {       chimera = "Yes";        }
46                         else                                                            {       chimera = "No";         }
47                         
48                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
49                         if (chimera == "Yes") {
50                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
51                         }
52                         out << "Observed\t";
53                         
54                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
55                         out << endl;
56                         
57                         out << "Expected\t";
58                         
59                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
60                         out << endl;
61                         
62                 }
63         }
64         catch(exception& e) {
65                 errorOut(e, "Pintail", "print");
66                 exit(1);
67         }
68 }
69
70 //***************************************************************************************************************
71 void Pintail::getChimeras() {
72         try {
73                 
74                 //read in query sequences and subject sequences
75                 mothurOut("Reading sequences and template file... "); cout.flush();
76                 querySeqs = readSeqs(fastafile);
77                 templateSeqs = readSeqs(templateFile);
78                 mothurOut("Done."); mothurOutEndLine();
79                 
80                 int numSeqs = querySeqs.size();
81                 
82                 obsDistance.resize(numSeqs);
83                 expectedDistance.resize(numSeqs);
84                 seqCoef.resize(numSeqs);
85                 DE.resize(numSeqs);
86                 Qav.resize(numSeqs);
87                 bestfit.resize(numSeqs);
88                 deviation.resize(numSeqs);
89                 trimmed.resize(numSeqs);
90                 windowSizes.resize(numSeqs, window);
91                 windowSizesTemplate.resize(templateSeqs.size(), window);
92                 windowsForeachQuery.resize(numSeqs);
93                 h.resize(numSeqs);
94                 quantiles.resize(100);  //one for every percent mismatch
95                 
96                 //break up file if needed
97                 int linesPerProcess = numSeqs / processors ;
98                 
99                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
100                         //find breakup of sequences for all times we will Parallelize
101                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
102                         else {
103                                 //fill line pairs
104                                 for (int i = 0; i < (processors-1); i++) {                      
105                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
106                                 }
107                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
108                                 int i = processors - 1;
109                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
110                         }
111                         
112                         //find breakup of templatefile for quantiles
113                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
114                         else { 
115                                 for (int i = 0; i < processors; i++) {
116                                         templateLines.push_back(new linePair());
117                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
118                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
119                                 }
120                         }
121                 #else
122                         lines.push_back(new linePair(0, numSeqs));
123                         templateLines.push_back(new linePair(0, templateSeqs.size()));
124                 #endif
125                 
126                 distcalculator = new ignoreGaps();
127                 decalc = new DeCalculator();
128                 
129                 decalc->setMask(seqMask);
130                 
131                 //find pairs
132                 if (processors == 1) { 
133                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
134                         bestfit = findPairs(lines[0]->start, lines[0]->end);
135                         mothurOut("Done."); mothurOutEndLine();
136                 }else {         createProcessesPairs();         }
137                 
138                 
139                 for (int j = 0; j < bestfit.size(); j++) { 
140                                 //chops off beginning and end of sequences so they both start and end with a base
141                                 ofstream out;
142                                 string s = querySeqs[j]->getName();
143                                 
144                                 openOutputFile(s, out);
145                                 out << ">" << querySeqs[j]->getName() << endl;
146                                 out << querySeqs[j]->getAligned() << endl;
147                                 out.close();
148                                 
149                                 string t =querySeqs[j]->getName() + ".ref";
150                                 openOutputFile(t, out);
151                                 out << ">" << bestfit[j]->getName() << endl;
152                                 out << bestfit[j]->getAligned() << endl;
153                                 out.close();
154                 }
155
156                 
157                 //find P
158                 mothurOut("Getting conservation... "); cout.flush();
159                 if (consfile == "") { 
160                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
161                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
162                         mothurOut("Done."); mothurOutEndLine();
163                 }else                           {   probabilityProfile = readFreq();                      }
164
165                 //make P into Q
166                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];   }  //cout << i << '\t' << probabilityProfile[i] << endl;
167                 mothurOut("Done."); mothurOutEndLine();
168                 
169                 //mask querys
170                 for (int i = 0; i < querySeqs.size(); i++) {
171                 //cout << querySeqs[i]->getName() << " before mask = " << querySeqs[i]->getAligned() << endl << endl;
172                         decalc->runMask(querySeqs[i]);
173                 //cout << querySeqs[i]->getName() << " after mask = " << querySeqs[i]->getAligned() << endl << endl;
174                 }
175                 
176                 //mask templates
177                 for (int i = 0; i < templateSeqs.size(); i++) {
178                         decalc->runMask(templateSeqs[i]);
179                 }
180                 
181 //for (int i = 0; i < lines.size(); i++) { cout << "line pair " << i << " = " << lines[i]->start << '\t' << lines[i]->end << endl;  }
182                                 
183                 if (processors == 1) { 
184         
185                         for (int j = 0; j < bestfit.size(); j++) { 
186                         cout << querySeqs[j]->getName() << " after mask = " << querySeqs[j]->getAligned() << endl << endl;
187                         cout << bestfit[j]->getName() << " after mask = " << bestfit[j]->getAligned() << endl << endl;
188                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
189                         }
190                         
191                         mothurOut("Finding window breaks... "); cout.flush();
192                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
193                                 it = trimmed[i].begin();
194 cout << i << '\t' << "trimmed = " << it->first << '\t' << it->second << endl;
195                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
196                                 windowsForeachQuery[i] = win;
197                         }
198                         mothurOut("Done."); mothurOutEndLine();
199                 
200                 }else {         createProcessesSpots();         }
201
202                 
203                 if (processors == 1) { 
204                                                 
205                         mothurOut("Calculating observed distance... "); cout.flush();
206                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
207         cout << querySeqs[i]->getName() << '\t' << bestfit[i]->getName() << " windows = " << windowsForeachQuery[i].size() << " size = " << windowSizes[i] << endl;
208                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
209         
210         for (int j = 0; j < obsi.size(); j++) {
211                 cout << obsi[j] << '\t';
212         }
213         cout << endl;
214                                 obsDistance[i] = obsi;
215                         }
216                         mothurOut("Done."); mothurOutEndLine();
217                         
218                         
219                         mothurOut("Finding variability... "); cout.flush();
220                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
221                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
222
223                                 Qav[i] = q;
224 cout << i+1 << endl;
225 for (int j = 0; j < Qav[i].size(); j++) {
226         cout << Qav[i][j] << '\t';
227 }
228 cout << endl << endl;
229
230                         }
231                         mothurOut("Done."); mothurOutEndLine();
232                         
233                         
234                         mothurOut("Calculating alpha... "); cout.flush();
235                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
236                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
237 cout << i+1 << "\tcoef = " << alpha << endl;
238                                 seqCoef[i] = alpha;
239                         }
240                         mothurOut("Done."); mothurOutEndLine();
241                 
242                 
243                         mothurOut("Calculating expected distance... "); cout.flush();
244                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
245                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
246                                 expectedDistance[i] = exp;
247                         }
248                         mothurOut("Done."); mothurOutEndLine();
249                         
250                         
251                         mothurOut("Finding deviation... "); cout.flush();
252                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
253                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
254                                 DE[i] = de;
255 cout << querySeqs[i]->getName() << '\t' << "de value = " << de << endl;                         
256                                 it = trimmed[i].begin();
257                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
258 cout << querySeqs[i]->getName() << '\t' << "dist value = " << dist << endl;
259                                 deviation[i] = dist;
260                         }
261                         mothurOut("Done."); mothurOutEndLine();
262                         
263                 } 
264                 else {          createProcesses();              }
265                 
266                 
267                 //quantiles are used to determine whether the de values found indicate a chimera
268                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
269                 //combination of sequences in the template
270                 if (quanfile != "") {  quantiles =  readQuantiles();  }
271                 else {
272                         
273                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
274                         if (processors == 1) { 
275                                 quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
276                         }else {         createProcessesQuan();          }
277                         
278                         
279                         decalc->removeObviousOutliers(quantiles);
280                         
281                         ofstream out4;
282                         string o = getRootName(templateFile) + "quan";
283                         
284                         openOutputFile(o, out4);
285                         
286                         //adjust quantiles
287                         for (int i = 0; i < quantiles.size(); i++) {
288                                 if (quantiles[i].size() == 0) {
289                                         //in case this is not a distance found in your template files
290                                         for (int g = 0; g < 6; g++) {
291                                                 quantiles[i].push_back(0.0);
292                                         }
293                                 }else{
294                                         
295                                         sort(quantiles[i].begin(), quantiles[i].end());
296                                         
297                                         vector<float> temp;
298                                         //save 10%
299                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)]);
300                                         //save 25%
301                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)]);
302                                         //save 50%
303                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)]);
304                                         //save 75%
305                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)]);
306                                         //save 95%
307                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)]);
308                                         //save 99%
309                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)]);
310                                         
311                                         quantiles[i] = temp;
312                                 }
313                                 
314                                 //output quan value
315                                 out4 << i+1 << '\t';                            
316                                 for (int u = 0; u < quantiles[i].size(); u++) {   out4 << quantiles[i][u] << '\t'; }
317                                 out4 << endl;
318
319                         }
320                         
321                         mothurOut("Done."); mothurOutEndLine();
322                 }
323         
324                 //free memory
325                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
326                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
327                         
328                 delete distcalculator;
329                 delete decalc;
330         }
331         catch(exception& e) {
332                 errorOut(e, "Pintail", "getChimeras");
333                 exit(1);
334         }
335 }
336
337 //***************************************************************************************************************
338
339 vector<float> Pintail::readFreq() {
340         try {
341         
342                 ifstream in;
343                 openInputFile(consfile, in);
344                 
345                 vector<float> prob;
346                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
347                 
348                 //read in probabilities and store in vector
349                 int pos; float num; 
350                 
351                 while(!in.eof()){
352                         
353                         in >> pos >> num;
354                         
355                         if (h.count(pos) > 0) {
356                                 float Pi;
357                                 Pi =  (num - 0.25) / 0.75; 
358                         
359                                 //cannot have probability less than 0.
360                                 if (Pi < 0) { Pi = 0.0; }
361
362                                 //do you want this spot
363                                 prob.push_back(Pi);  
364                         }
365                         
366                         gobble(in);
367                 }
368                 
369                 in.close();
370                 return prob;
371                 
372         }
373         catch(exception& e) {
374                 errorOut(e, "Pintail", "readFreq");
375                 exit(1);
376         }
377 }
378
379 //***************************************************************************************************************
380
381 vector< vector<float> > Pintail::readQuantiles() {
382         try {
383         
384                 ifstream in;
385                 openInputFile(quanfile, in);
386                 
387                 vector< vector<float> > quan;
388         
389                 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
390                 
391                 while(!in.eof()){
392                         
393                         in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
394                         
395                         vector <float> temp;
396                         
397                         temp.push_back(ten); 
398                         temp.push_back(twentyfive);
399                         temp.push_back(fifty);
400                         temp.push_back(seventyfive);
401                         temp.push_back(ninetyfive);
402                         temp.push_back(ninetynine);
403                         
404                         quan.push_back(temp);  
405                         
406                         gobble(in);
407                 }
408                 
409                 in.close();
410                 return quan;
411                 
412         }
413         catch(exception& e) {
414                 errorOut(e, "Pintail", "readQuantiles");
415                 exit(1);
416         }
417 }
418 //***************************************************************************************************************
419 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
420 vector<Sequence*> Pintail::findPairs(int start, int end) {
421         try {
422                 
423                 vector<Sequence*> seqsMatches;  
424                 
425                 for(int i = start; i < end; i++){
426                 
427                         float smallest = 10000.0;
428                         Sequence query = *(querySeqs[i]);
429                         Sequence* match;
430                         
431                         for(int j = 0; j < templateSeqs.size(); j++){
432                                 
433                                 Sequence temp = *(templateSeqs[j]);
434                                 
435                                 distcalculator->calcDist(query, temp);
436                                 float dist = distcalculator->getDist();
437                                 
438                                 if (dist < smallest) { 
439                                         match = templateSeqs[j];
440                                         smallest = dist;
441                                 }
442                         }
443                         
444                         seqsMatches.push_back(match);
445                 }
446                 
447                 return seqsMatches;
448         
449         }
450         catch(exception& e) {
451                 errorOut(e, "Pintail", "findPairs");
452                 exit(1);
453         }
454 }
455
456 /**************************************************************************************************/
457
458 void Pintail::createProcessesSpots() {
459         try {
460 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
461                 int process = 0;
462                 vector<int> processIDS;
463                 
464                 //loop through and create all the processes you want
465                 while (process != processors) {
466                         int pid = fork();
467                         
468                         if (pid > 0) {
469                                 processIDS.push_back(pid);  
470                                 process++;
471                         }else if (pid == 0){
472                                 
473                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
474                                 
475                                         //chops off beginning and end of sequences so they both start and end with a base
476                                         map<int, int> trim;
477                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
478                                         trimmed[j] = trim;
479                                         
480                                 }
481
482                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
483                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
484                                         it = trimmed[i].begin();
485                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
486                                 }
487                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
488                                 
489                                 //write out data to file so parent can read it
490                                 ofstream out;
491                                 string s = toString(getpid()) + ".temp";
492                                 openOutputFile(s, out);
493                                 
494                                 //output windowsForeachQuery
495                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
496                                         out << windowsForeachQuery[i].size() << '\t';
497                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
498                                                 out << windowsForeachQuery[i][j] << '\t';
499                                         }
500                                         out << endl;
501                                 }
502                         
503                                 //output windowSizes
504                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
505                                         out << windowSizes[i] << '\t';
506                                 }
507                                 out << endl;
508                                 
509                                 //output trimmed values
510                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
511                                         it = trimmed[i].begin();
512                                         
513                                         out << it->first << '\t' << it->second << endl;
514                                 }
515                                 out.close();
516                                 
517                                 exit(0);
518                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
519                 }
520                 
521                 //force parent to wait until all the processes are done
522                 for (int i=0;i<processors;i++) { 
523                         int temp = processIDS[i];
524                         wait(&temp);
525                 }
526                 
527                 //get data created by processes
528                 for (int i=0;i<processors;i++) { 
529                         ifstream in;
530                         string s = toString(processIDS[i]) + ".temp";
531                         openInputFile(s, in);
532                         
533                         int size = lines[i]->end - lines[i]->start;
534                                         
535                         int count = lines[i]->start;
536                         for (int m = 0; m < size; m++) {
537                                 int num;
538                                 in >> num;
539                         
540                                 vector<int> win;  int w;
541                                 for (int j = 0; j < num; j++) {
542                                         in >> w;
543                                         win.push_back(w);
544                                 }
545                         
546                                 windowsForeachQuery[count] = win;
547                                 count++;
548                                 gobble(in);
549                         }
550                 
551                         gobble(in);
552                         count = lines[i]->start;
553                         for (int m = 0; m < size; m++) {
554                                 int num;
555                                 in >> num;
556                                 
557                                 windowSizes[count] = num;
558                                 count++;
559                         }
560                         
561                         gobble(in);
562                         
563                         count = lines[i]->start;
564                         for (int m = 0; m < size; m++) {
565                                 int front, back;
566                                 in >> front >> back;
567                                 
568                                 map<int, int> t;
569                                 
570                                 t[front] = back;
571                                 
572                                 trimmed[count] = t;
573                                 count++;
574                                 
575                                 gobble(in);
576                         }
577
578                         
579                         in.close();
580                         remove(s.c_str());
581                 }
582                         
583         
584 #else
585                 for (int j = 0; j < bestfit.size(); j++) {
586                         //chops off beginning and end of sequences so they both start and end with a base
587                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
588                 }
589
590                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
591                                 it = trimmed[i].begin();
592                                 map<int, int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
593                                 windows[i] = win;
594                 }
595
596 #endif          
597         }
598         catch(exception& e) {
599                 errorOut(e, "Pintail", "createProcessesSpots");
600                 exit(1);
601         }
602 }
603 /**************************************************************************************************/
604
605 void Pintail::createProcessesPairs() {
606         try {
607 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
608                 int process = 0;
609                 vector<int> processIDS;
610                 
611                 //loop through and create all the processes you want
612                 while (process != processors) {
613                         int pid = fork();
614                         
615                         if (pid > 0) {
616                                 processIDS.push_back(pid);  
617                                 process++;
618                         }else if (pid == 0){
619                                 
620                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
621                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
622                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
623                                 
624                                 //write out data to file so parent can read it
625                                 ofstream out;
626                                 string s = toString(getpid()) + ".temp";
627                                 openOutputFile(s, out);
628                                 
629                                 //output range and size
630                                 out << bestfit.size() << endl;
631                                 
632                                 //output pairs
633                                 for (int i = 0; i < bestfit.size(); i++) {
634                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
635                                 }
636                                 out.close();
637                                 
638                                 exit(0);
639                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
640                 }
641                 
642                 //force parent to wait until all the processes are done
643                 for (int i=0;i<processors;i++) { 
644                         int temp = processIDS[i];
645                         wait(&temp);
646                 }
647                 
648                 //get data created by processes
649                 for (int i=0;i<processors;i++) { 
650                         ifstream in;
651                         string s = toString(processIDS[i]) + ".temp";
652                         openInputFile(s, in);
653                         
654                         int size;
655                         in >> size;  gobble(in);
656                                 
657                         //get pairs
658                         int count = lines[i]->start;
659                         for (int m = 0; m < size; m++) {
660                                 Sequence* temp = new Sequence(in);
661                                 bestfit[count] = temp;
662                         
663                                 count++;
664                                 gobble(in);
665                         }
666                         
667                         in.close();
668                         remove(s.c_str());
669                 }
670                         
671         
672 #else
673                 bestfit = findPairs(lines[0]->start, lines[0]->end);
674 #endif          
675         }
676         catch(exception& e) {
677                 errorOut(e, "Pintail", "createProcessesPairs");
678                 exit(1);
679         }
680 }
681 /**************************************************************************************************/
682
683 void Pintail::createProcesses() {
684         try {
685 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
686                 int process = 0;
687                 vector<int> processIDS;
688                 
689                 //loop through and create all the processes you want
690                 while (process != processors) {
691                         int pid = fork();
692                         
693                         if (pid > 0) {
694                                 processIDS.push_back(pid);  
695                                 process++;
696                         }else if (pid == 0){
697                                 
698                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
699                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
700                                         
701                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
702                                         obsDistance[i] = obsi;
703                                 
704                                         //calc Qav
705                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
706                                         
707                                         //get alpha
708                                         float alpha = decalc->getCoef(obsDistance[i], q);
709                                         
710                                         //find expected
711                                         vector<float> exp = decalc->calcExpected(q, alpha);
712                                         expectedDistance[i] = exp;
713                                         
714                                         //get de and deviation
715                                         float dei = decalc->calcDE(obsi, exp);
716                                         DE[i] = dei;
717                                         
718                                         it = trimmed[i].begin();
719                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
720                                         deviation[i] = dist;
721                                 }
722                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
723                                 
724                                 //write out data to file so parent can read it
725                                 ofstream out;
726                                 string s = toString(getpid()) + ".temp";
727                                 openOutputFile(s, out);
728                                 
729                                 int size = lines[process]->end - lines[process]->start;
730                                 out << size << endl;
731                                                                 
732                                 //output observed distances
733                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
734                                         out << obsDistance[i].size() << '\t';
735                                         for (int j = 0; j < obsDistance[i].size(); j++) {
736                                                 out << obsDistance[i][j] << '\t';
737                                         }
738                                         out << endl;
739                                 }
740                                 
741                                 
742                                 //output expected distances
743                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
744                                         out << expectedDistance[i].size() << '\t';
745                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
746                                                 out << expectedDistance[i][j] << '\t';
747                                         }
748                                         out << endl;
749                                 }
750
751                         
752                                 //output de values
753                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
754                                         out << DE[i] << '\t';
755                                 }
756                                 out << endl;    
757                                 
758                                 //output de values
759                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
760                                         out << deviation[i] << '\t';
761                                 }
762                                 out << endl;
763                                 
764                                 out.close();
765
766                                 exit(0);
767                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
768                 }
769                 
770                 //force parent to wait until all the processes are done
771                 for (int i=0;i<processors;i++) { 
772                         int temp = processIDS[i];
773                         wait(&temp);
774                 }
775                 
776                 //get data created by processes
777                 for (int i=0;i<processors;i++) { 
778                         ifstream in;
779                         string s = toString(processIDS[i]) + ".temp";
780                         openInputFile(s, in);
781                         
782                         int size;
783                         in >> size;  gobble(in);
784                         
785                         //get observed distances
786                         int count = lines[i]->start;
787                         for (int m = 0; m < size; m++) {
788                                 int num;
789                                 in >> num;
790                         
791                                 vector<float> obs;  float w;
792                                 for (int j = 0; j < num; j++) {
793                                         in >> w;
794                                         obs.push_back(w);
795                                 }
796                         
797                                 obsDistance[count] = obs;
798                                 count++;
799                                 gobble(in);
800                         }
801                         
802                         gobble(in);
803                         
804                         //get expected distances
805                         count = lines[i]->start;
806                         for (int m = 0; m < size; m++) {
807                                 int num;
808                                 in >> num;
809                         
810                                 vector<float> exp;  float w;
811                                 for (int j = 0; j < num; j++) {
812                                         in >> w;
813                                         exp.push_back(w);
814                                 }
815                         
816                                 expectedDistance[count] = exp;
817                                 count++;
818                                 gobble(in);
819                         }
820
821                         gobble(in);
822                         
823                         count = lines[i]->start;
824                         for (int m = 0; m < size; m++) {
825                                 float num;
826                                 in >> num;
827                                 
828                                 DE[count] = num;
829                                 count++;
830                         }
831                         
832                         gobble(in);
833                         
834                         count = lines[i]->start;
835                         for (int m = 0; m < size; m++) {
836                                 float num;
837                                 in >> num;
838                                 
839                                 deviation[count] = num;
840                                 count++;
841                         }
842
843                         in.close();
844                         remove(s.c_str());
845                 }
846
847                                 
848 #else
849                         mothurOut("Calculating observed distance... "); cout.flush();
850                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
851                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windows[i], windowSizes[i]);
852                                 obsDistance[i] = obsi;
853                         }
854                         mothurOut("Done."); mothurOutEndLine();
855                         
856                         
857                         
858                         mothurOut("Finding variability... "); cout.flush();
859                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
860                                 vector<float> q = decalc->findQav(windows[i], windowSizes[i], probabilityProfile, h[i]);
861                                 Qav[i] = q;
862                         }
863                         mothurOut("Done."); mothurOutEndLine();
864                         
865                         
866                         
867                         mothurOut("Calculating alpha... "); cout.flush();
868                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
869                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
870                                 seqCoef.push_back(alpha);
871                         }
872                         mothurOut("Done."); mothurOutEndLine();
873                 
874                 
875                 
876                         mothurOut("Calculating expected distance... "); cout.flush();
877                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
878                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
879                                 expectedDistance[i] = exp;
880                         }
881                         mothurOut("Done."); mothurOutEndLine();
882                         
883                         
884                         
885                         mothurOut("Finding deviation... "); cout.flush();
886                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
887                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
888                                 DE[i] = de;
889                                 
890                                 it = trimmed[i].begin();
891                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
892                                 deviation[i] = dist;
893                         }
894                         mothurOut("Done."); mothurOutEndLine();
895
896 #endif          
897         }
898         catch(exception& e) {
899                 errorOut(e, "Pintail", "createProcesses");
900                 exit(1);
901         }
902 }
903
904
905 /**************************************************************************************************/
906
907 void Pintail::createProcessesQuan() {
908         try {
909 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
910                 int process = 0;
911                 vector<int> processIDS;
912                                 
913                 //loop through and create all the processes you want
914                 while (process != processors) {
915                         int pid = fork();
916                         
917                         if (pid > 0) {
918                                 processIDS.push_back(pid);  
919                                 process++;
920                         }else if (pid == 0){
921                                 
922                                 quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
923                                 
924                                 //write out data to file so parent can read it
925                                 ofstream out;
926                                 string s = toString(getpid()) + ".temp";
927                                 openOutputFile(s, out);
928                                 
929                                                                 
930                                 //output observed distances
931                                 for (int i = 0; i < quantiles.size(); i++) {
932                                         out << quantiles[i].size() << '\t';
933                                         for (int j = 0; j < quantiles[i].size(); j++) {
934                                                 out << quantiles[i][j] << '\t';
935                                         }
936                                         out << endl;
937                                 }
938                                 
939                                 out.close();
940                                 
941                                 exit(0);
942                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
943                 }
944                 
945                 //force parent to wait until all the processes are done
946                 for (int i=0;i<processors;i++) { 
947                         int temp = processIDS[i];
948                         wait(&temp);
949                 }
950                 
951                 //get data created by processes
952                 for (int i=0;i<processors;i++) { 
953                         ifstream in;
954                         string s = toString(processIDS[i]) + ".temp";
955                         openInputFile(s, in);
956                         
957                         vector< vector<float> > quan; quan.resize(100);
958                         
959                         //get quantiles
960                         for (int m = 0; m < quan.size(); m++) {
961                                 int num;
962                                 in >> num;
963                         
964                                 vector<float> q;  float w;
965                                 for (int j = 0; j < num; j++) {
966                                         in >> w;
967                                         q.push_back(w);
968                                 }
969                         
970                                 quan[m] = q;
971                                 gobble(in);
972                         }
973                         
974                         
975                         //save quan in quantiles
976                         for (int i = 0; i < quan.size(); i++) {
977                                 //put all values of q[i] into quan[i]
978                                 quantiles[i].insert(quantiles[i].begin(), quan[i].begin(), quan[i].end());
979                         }
980                                         
981                         in.close();
982                         remove(s.c_str());
983                 }
984                 
985 #else
986                 quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
987 #endif          
988         }
989         catch(exception& e) {
990                 errorOut(e, "Pintail", "createProcessesQuan");
991                 exit(1);
992         }
993 }
994
995
996 //***************************************************************************************************************
997
998