]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
added set.dir command and modified commands to redirect input and output, removed...
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareQuanMembers(quanMember left, quanMember right){
17         return (left.score < right.score);      
18
19 //***************************************************************************************************************
20
21 Pintail::Pintail(string filename, string temp, string o) {  fastafile = filename;  templateFile = temp; outputDir = o; }
22 //***************************************************************************************************************
23
24 Pintail::~Pintail() {
25         try {
26                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
27                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
28                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
29         }
30         catch(exception& e) {
31                 errorOut(e, "Pintail", "~Pintail");
32                 exit(1);
33         }
34 }       
35 //***************************************************************************************************************
36 void Pintail::print(ostream& out) {
37         try {
38                 
39                 mothurOutEndLine();
40                 
41                 for (int i = 0; i < querySeqs.size(); i++) {
42                         
43                         int index = ceil(deviation[i]);
44                                                 
45                         //is your DE value higher than the 95%
46                         string chimera;
47                         if (quantiles[index][4] == 0.0) {
48                                 chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
49                         }else {
50                                 if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
51                                 else                                                                    {       chimera = "No";         }
52                         }
53                         
54                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
55                         if (chimera == "Yes") {
56                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
57                         }
58                         out << "Observed\t";
59                         
60                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
61                         out << endl;
62                         
63                         out << "Expected\t";
64                         
65                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
66                         out << endl;
67                         
68                 }
69         }
70         catch(exception& e) {
71                 errorOut(e, "Pintail", "print");
72                 exit(1);
73         }
74 }
75
76 //***************************************************************************************************************
77 int Pintail::getChimeras() {
78         try {
79                 
80                 //read in query sequences and subject sequences
81                 mothurOut("Reading sequences and template file... "); cout.flush();
82                 querySeqs = readSeqs(fastafile);
83                 templateSeqs = readSeqs(templateFile);
84                 mothurOut("Done."); mothurOutEndLine();
85                 
86                 int numSeqs = querySeqs.size();
87                 
88                 if (unaligned) { mothurOut("Your sequences need to be aligned when you use the pintail method."); mothurOutEndLine(); return 1;  }
89                 
90                 obsDistance.resize(numSeqs);
91                 expectedDistance.resize(numSeqs);
92                 seqCoef.resize(numSeqs);
93                 DE.resize(numSeqs);
94                 Qav.resize(numSeqs);
95                 bestfit.resize(numSeqs);
96                 deviation.resize(numSeqs);
97                 trimmed.resize(numSeqs);
98                 windowSizes.resize(numSeqs, window);
99                 windowSizesTemplate.resize(templateSeqs.size(), window);
100                 windowsForeachQuery.resize(numSeqs);
101                 h.resize(numSeqs);
102                 quantiles.resize(100);  //one for every percent mismatch
103                 quantilesMembers.resize(100);  //one for every percent mismatch
104                 
105                 //break up file if needed
106                 int linesPerProcess = numSeqs / processors ;
107                 
108                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
109                         //find breakup of sequences for all times we will Parallelize
110                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
111                         else {
112                                 //fill line pairs
113                                 for (int i = 0; i < (processors-1); i++) {                      
114                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
115                                 }
116                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
117                                 int i = processors - 1;
118                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
119                         }
120                         
121                         //find breakup of templatefile for quantiles
122                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
123                         else { 
124                                 for (int i = 0; i < processors; i++) {
125                                         templateLines.push_back(new linePair());
126                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
127                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
128                                 }
129                         }
130                 #else
131                         lines.push_back(new linePair(0, numSeqs));
132                         templateLines.push_back(new linePair(0, templateSeqs.size()));
133                 #endif
134                 
135                 distcalculator = new eachGapDist();
136                 decalc = new DeCalculator();
137                 
138                 //if the user does enter a mask then you want to keep all the spots in the alignment
139                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
140                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
141                 
142                 decalc->setMask(seqMask);
143                 
144                 //find pairs
145                 if (processors == 1) { 
146                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
147                         bestfit = findPairs(lines[0]->start, lines[0]->end);
148                         mothurOut("Done."); mothurOutEndLine();
149                 }else {         createProcessesPairs();         }
150                 
151 //string o = "closestmatch.eachgap.fasta";
152 //ofstream out7;
153 //openOutputFile(o, out7);
154
155 //for (int i = 0; i < bestfit.size(); i++) {
156         //out7 << ">" << querySeqs[i]->getName() << "-"<< bestfit[i]->getName() << endl;
157         //out7 << bestfit[i]->getAligned() << endl;
158 //}             
159 //out7.close(); 
160                 //find P
161                 mothurOut("Getting conservation... "); cout.flush();
162                 if (consfile == "") { 
163                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
164                         probabilityProfile = decalc->calcFreq(templateSeqs, outputDir + getSimpleName(templateFile)); 
165                         mothurOut("Done."); mothurOutEndLine();
166                 }else                           {   probabilityProfile = readFreq();                      }
167
168                 //make P into Q
169                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
170                 mothurOut("Done."); mothurOutEndLine();
171                 
172                 //mask sequences if the user wants to 
173                 if (seqMask != "") {
174                         //mask querys
175                         for (int i = 0; i < querySeqs.size(); i++) {
176                                 decalc->runMask(querySeqs[i]);
177                         }
178                 
179                         //mask templates
180                         for (int i = 0; i < templateSeqs.size(); i++) {
181                                 decalc->runMask(templateSeqs[i]);
182                         }
183                         
184                         for (int i = 0; i < bestfit.size(); i++) { 
185                                 decalc->runMask(bestfit[i]);
186                         }
187
188                 }
189                 
190                 if (filter) {
191                         vector<Sequence*> temp = templateSeqs;
192                         for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
193                         
194                         createFilter(temp);
195                         
196                         runFilter(querySeqs);
197                         runFilter(templateSeqs);
198                         runFilter(bestfit);
199                 }
200                                                                                 
201                                                                                                                                                 
202                 if (processors == 1) { 
203         
204                         for (int j = 0; j < bestfit.size(); j++) { 
205                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
206                         }
207                         
208                         mothurOut("Finding window breaks... "); cout.flush();
209                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
210                                 it = trimmed[i].begin();
211                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
212                                 windowsForeachQuery[i] = win;
213                         }
214                         mothurOut("Done."); mothurOutEndLine();
215                 
216                 }else {         createProcessesSpots();         }
217                 
218                 if (processors == 1) { 
219                                                 
220                         mothurOut("Calculating observed distance... "); cout.flush();
221                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
222                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
223         
224                                 obsDistance[i] = obsi;
225                         }
226                         mothurOut("Done."); mothurOutEndLine();
227                         
228                         
229                         mothurOut("Finding variability... "); cout.flush();
230                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
231                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
232
233                                 Qav[i] = q;
234                         }
235                         mothurOut("Done."); mothurOutEndLine();
236                         
237                         
238                         mothurOut("Calculating alpha... "); cout.flush();
239                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
240                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
241                                 seqCoef[i] = alpha;
242                         }
243                         mothurOut("Done."); mothurOutEndLine();
244                 
245                 
246                         mothurOut("Calculating expected distance... "); cout.flush();
247                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
248                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
249                                 expectedDistance[i] = exp;
250                         }
251                         mothurOut("Done."); mothurOutEndLine();
252                         
253                         
254                         mothurOut("Finding deviation... "); cout.flush();
255                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
256                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
257                                 DE[i] = de;
258                         
259                                 it = trimmed[i].begin();
260                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
261                                 deviation[i] = dist;
262                         }
263                         mothurOut("Done."); mothurOutEndLine();
264                         
265                 } 
266                 else {          createProcesses();              }
267                 
268                 //quantiles are used to determine whether the de values found indicate a chimera
269                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
270                 //combination of sequences in the template
271                 if (quanfile != "") {  quantiles = readQuantiles();  }
272                 else {
273                         
274                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
275                         if (processors == 1) { 
276                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
277                         }else {         createProcessesQuan();          }
278                 
279                         
280                         ofstream out4, out5;
281                         string noOutliers, outliers;
282                         
283                         if ((!filter) && (seqMask == "")) {
284                                 noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.quan";
285                         }else if ((filter) && (seqMask == "")) { 
286                                 noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.quan";
287                         }else if ((!filter) && (seqMask != "")) { 
288                                 noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.masked.quan";
289                         }else if ((filter) && (seqMask != "")) { 
290                                 noOutliers = outputDir + getRootName(getSimpleName(templateFile)) + "pintail.filtered.masked.quan";
291                         }
292
293                         //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
294                         
295                         /*openOutputFile(outliers, out4);
296                         
297                         //adjust quantiles
298                         for (int i = 0; i < quantilesMembers.size(); i++) {
299                                 vector<float> temp;
300                                 
301                                 if (quantilesMembers[i].size() == 0) {
302                                         //in case this is not a distance found in your template files
303                                         for (int g = 0; g < 6; g++) {
304                                                 temp.push_back(0.0);
305                                         }
306                                 }else{
307                                         
308                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
309                                         
310                                         //save 10%
311                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
312                                         //save 25%
313                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
314                                         //save 50%
315                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
316                                         //save 75%
317                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
318                                         //save 95%
319                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
320                                         //save 99%
321                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
322                                         
323                                 }
324                                 
325                                 //output quan value
326                                 out4 << i+1 << '\t';                            
327                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
328                                 out4 << endl;
329                                 
330                                 quantiles[i] = temp;
331                                 
332                         }
333                         
334                         out4.close();*/
335                         
336                         decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
337                         
338                         openOutputFile(noOutliers, out5);
339                         
340                         //adjust quantiles
341                         for (int i = 0; i < quantilesMembers.size(); i++) {
342                                 vector<float> temp;
343                                 
344                                 if (quantilesMembers[i].size() == 0) {
345                                         //in case this is not a distance found in your template files
346                                         for (int g = 0; g < 6; g++) {
347                                                 temp.push_back(0.0);
348                                         }
349                                 }else{
350                                         
351                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
352                                         
353                                         //save 10%
354                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
355                                         //save 25%
356                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
357                                         //save 50%
358                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
359                                         //save 75%
360                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
361                                         //save 95%
362                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
363                                         //save 99%
364                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
365                                         
366                                 }
367                                 
368                                 //output quan value
369                                 out5 << i+1 << '\t';                            
370                                 for (int u = 0; u < temp.size(); u++) {   out5 << temp[u] << '\t'; }
371                                 out5 << endl;
372                                 
373                                 quantiles[i] = temp;
374                                 
375                         }
376
377                         mothurOut("Done."); mothurOutEndLine();
378                         
379                 }
380                         
381                 //free memory
382                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
383                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
384                         
385                 delete distcalculator;
386                 delete decalc;
387                 
388                 return 0;
389         }
390         catch(exception& e) {
391                 errorOut(e, "Pintail", "getChimeras");
392                 exit(1);
393         }
394 }
395
396 //***************************************************************************************************************
397
398 vector<float> Pintail::readFreq() {
399         try {
400         
401                 ifstream in;
402                 openInputFile(consfile, in);
403                 
404                 vector<float> prob;
405                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
406                 
407                 //read in probabilities and store in vector
408                 int pos; float num; 
409                 
410                 while(!in.eof()){
411                         
412                         in >> pos >> num;
413                         
414                         if (h.count(pos) > 0) {
415                                 float Pi;
416                                 Pi =  (num - 0.25) / 0.75; 
417                         
418                                 //cannot have probability less than 0.
419                                 if (Pi < 0) { Pi = 0.0; }
420
421                                 //do you want this spot
422                                 prob.push_back(Pi);  
423                         }
424                         
425                         gobble(in);
426                 }
427                 
428                 in.close();
429                 return prob;
430                 
431         }
432         catch(exception& e) {
433                 errorOut(e, "Pintail", "readFreq");
434                 exit(1);
435         }
436 }
437
438 //***************************************************************************************************************
439 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
440 vector<Sequence*> Pintail::findPairs(int start, int end) {
441         try {
442                 
443                 vector<Sequence*> seqsMatches;  
444                 
445                 for(int i = start; i < end; i++){
446                         
447                         vector<Sequence*> copy = decalc->findClosest(querySeqs[i], templateSeqs, 1);
448                         seqsMatches.push_back(copy[0]);
449                 }
450                 
451                 return seqsMatches;
452         
453         }
454         catch(exception& e) {
455                 errorOut(e, "Pintail", "findPairs");
456                 exit(1);
457         }
458 }
459
460 /**************************************************************************************************/
461
462 void Pintail::createProcessesSpots() {
463         try {
464 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
465                 int process = 0;
466                 vector<int> processIDS;
467                 
468                 //loop through and create all the processes you want
469                 while (process != processors) {
470                         int pid = fork();
471                         
472                         if (pid > 0) {
473                                 processIDS.push_back(pid);  
474                                 process++;
475                         }else if (pid == 0){
476                                 
477                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
478                                 
479                                         //chops off beginning and end of sequences so they both start and end with a base
480                                         map<int, int> trim;
481
482                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
483                                         trimmed[j] = trim;
484                                         
485                                 }
486
487                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
488                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
489                                         it = trimmed[i].begin();
490                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
491                                 }
492                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
493                                 
494                                 //write out data to file so parent can read it
495                                 ofstream out;
496                                 string s = toString(getpid()) + ".temp";
497                                 openOutputFile(s, out);
498                                 
499                                 //output windowsForeachQuery
500                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
501                                         out << windowsForeachQuery[i].size() << '\t';
502                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
503                                                 out << windowsForeachQuery[i][j] << '\t';
504                                         }
505                                         out << endl;
506                                 }
507                         
508                                 //output windowSizes
509                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
510                                         out << windowSizes[i] << '\t';
511                                 }
512                                 out << endl;
513                                 
514                                 //output trimmed values
515                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
516                                         it = trimmed[i].begin();                                        
517                                         out << it->first << '\t' << it->second << endl;
518                                 }
519                                 out.close();
520                                 
521                                 exit(0);
522                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
523                 }
524                 
525                 //force parent to wait until all the processes are done
526                 for (int i=0;i<processors;i++) { 
527                         int temp = processIDS[i];
528                         wait(&temp);
529                 }
530                 
531                 //get data created by processes
532                 for (int i=0;i<processors;i++) { 
533                         ifstream in;
534                         string s = toString(processIDS[i]) + ".temp";
535                         openInputFile(s, in);
536                         
537                         int size = lines[i]->end - lines[i]->start;
538                                         
539                         int count = lines[i]->start;
540                         for (int m = 0; m < size; m++) {
541                                 int num;
542                                 in >> num;
543                         
544                                 vector<int> win;  int w;
545                                 for (int j = 0; j < num; j++) {
546                                         in >> w;
547                                         win.push_back(w);
548                                 }
549                         
550                                 windowsForeachQuery[count] = win;
551                                 count++;
552                                 gobble(in);
553                         }
554                 
555                         gobble(in);
556                         count = lines[i]->start;
557                         for (int m = 0; m < size; m++) {
558                                 int num;
559                                 in >> num;
560                                 
561                                 windowSizes[count] = num;
562                                 count++;
563                         }
564                         
565                         gobble(in);
566                         
567                         count = lines[i]->start;
568                         for (int m = 0; m < size; m++) {
569                                 int front, back;
570                                 in >> front >> back;
571                         
572                                 map<int, int> t;
573                                 
574                                 t[front] = back;
575                                 
576                                 trimmed[count] = t;
577                                 count++;
578                                 
579                                 gobble(in);
580                         }
581
582                         
583                         in.close();
584                         remove(s.c_str());
585                 }
586                         
587         
588 #else
589                 for (int j = 0; j < bestfit.size(); j++) {
590                         //chops off beginning and end of sequences so they both start and end with a base
591                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
592                 }
593
594                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
595                                 it = trimmed[i].begin();
596                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
597                                 windowsForeachQuery[i] = win;
598                 }
599
600 #endif          
601         }
602         catch(exception& e) {
603                 errorOut(e, "Pintail", "createProcessesSpots");
604                 exit(1);
605         }
606 }
607 /**************************************************************************************************/
608
609 void Pintail::createProcessesPairs() {
610         try {
611 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
612                 int process = 0;
613                 vector<int> processIDS;
614                 
615                 //loop through and create all the processes you want
616                 while (process != processors) {
617                         int pid = fork();
618                         
619                         if (pid > 0) {
620                                 processIDS.push_back(pid);  
621                                 process++;
622                         }else if (pid == 0){
623                                 
624                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
625                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
626                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
627                                 
628                                 //write out data to file so parent can read it
629                                 ofstream out;
630                                 string s = toString(getpid()) + ".temp";
631                                 openOutputFile(s, out);
632                                 
633                                 //output range and size
634                                 out << bestfit.size() << endl;
635                                 
636                                 //output pairs
637                                 for (int i = 0; i < bestfit.size(); i++) {
638                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
639                                 }
640                                 out.close();
641                                 
642                                 exit(0);
643                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
644                 }
645                 
646                 //force parent to wait until all the processes are done
647                 for (int i=0;i<processors;i++) { 
648                         int temp = processIDS[i];
649                         wait(&temp);
650                 }
651                 
652                 //get data created by processes
653                 for (int i=0;i<processors;i++) { 
654                         ifstream in;
655                         string s = toString(processIDS[i]) + ".temp";
656                         openInputFile(s, in);
657                         
658                         int size;
659                         in >> size;  gobble(in);
660                                 
661                         //get pairs
662                         int count = lines[i]->start;
663                         for (int m = 0; m < size; m++) {
664                                 Sequence* temp = new Sequence(in);
665                                 bestfit[count] = temp;
666                         
667                                 count++;
668                                 gobble(in);
669                         }
670                         
671                         in.close();
672                         remove(s.c_str());
673                 }
674                         
675         
676 #else
677                 bestfit = findPairs(lines[0]->start, lines[0]->end);
678 #endif          
679         }
680         catch(exception& e) {
681                 errorOut(e, "Pintail", "createProcessesPairs");
682                 exit(1);
683         }
684 }
685 /**************************************************************************************************/
686
687 void Pintail::createProcesses() {
688         try {
689 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
690                 int process = 0;
691                 vector<int> processIDS;
692                 
693                 //loop through and create all the processes you want
694                 while (process != processors) {
695                         int pid = fork();
696                         
697                         if (pid > 0) {
698                                 processIDS.push_back(pid);  
699                                 process++;
700                         }else if (pid == 0){
701                                 
702                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
703                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
704                                         
705                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
706                                         obsDistance[i] = obsi;
707                                 
708                                         //calc Qav
709                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
710                                         
711                                         //get alpha
712                                         float alpha = decalc->getCoef(obsDistance[i], q);
713                                         
714                                         //find expected
715                                         vector<float> exp = decalc->calcExpected(q, alpha);
716                                         expectedDistance[i] = exp;
717                                         
718                                         //get de and deviation
719                                         float dei = decalc->calcDE(obsi, exp);
720                                         DE[i] = dei;
721                                         
722                                         it = trimmed[i].begin();
723                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
724                                         deviation[i] = dist;
725                                 }
726                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
727                                 
728                                 //write out data to file so parent can read it
729                                 ofstream out;
730                                 string s = toString(getpid()) + ".temp";
731                                 openOutputFile(s, out);
732                                 
733                                 int size = lines[process]->end - lines[process]->start;
734                                 out << size << endl;
735                                                                 
736                                 //output observed distances
737                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
738                                         out << obsDistance[i].size() << '\t';
739                                         for (int j = 0; j < obsDistance[i].size(); j++) {
740                                                 out << obsDistance[i][j] << '\t';
741                                         }
742                                         out << endl;
743                                 }
744                                 
745                                 
746                                 //output expected distances
747                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
748                                         out << expectedDistance[i].size() << '\t';
749                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
750                                                 out << expectedDistance[i][j] << '\t';
751                                         }
752                                         out << endl;
753                                 }
754
755                         
756                                 //output de values
757                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
758                                         out << DE[i] << '\t';
759                                 }
760                                 out << endl;    
761                                 
762                                 //output de values
763                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
764                                         out << deviation[i] << '\t';
765                                 }
766                                 out << endl;
767                                 
768                                 out.close();
769
770                                 exit(0);
771                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
772                 }
773                 
774                 //force parent to wait until all the processes are done
775                 for (int i=0;i<processors;i++) { 
776                         int temp = processIDS[i];
777                         wait(&temp);
778                 }
779                 
780                 //get data created by processes
781                 for (int i=0;i<processors;i++) { 
782                         ifstream in;
783                         string s = toString(processIDS[i]) + ".temp";
784                         openInputFile(s, in);
785                         
786                         int size;
787                         in >> size;  gobble(in);
788                         
789                         //get observed distances
790                         int count = lines[i]->start;
791                         for (int m = 0; m < size; m++) {
792                                 int num;
793                                 in >> num;
794                         
795                                 vector<float> obs;  float w;
796                                 for (int j = 0; j < num; j++) {
797                                         in >> w;
798                                         obs.push_back(w);
799                                 }
800                         
801                                 obsDistance[count] = obs;
802                                 count++;
803                                 gobble(in);
804                         }
805                         
806                         gobble(in);
807                         
808                         //get expected distances
809                         count = lines[i]->start;
810                         for (int m = 0; m < size; m++) {
811                                 int num;
812                                 in >> num;
813                         
814                                 vector<float> exp;  float w;
815                                 for (int j = 0; j < num; j++) {
816                                         in >> w;
817                                         exp.push_back(w);
818                                 }
819                         
820                                 expectedDistance[count] = exp;
821                                 count++;
822                                 gobble(in);
823                         }
824
825                         gobble(in);
826                         
827                         count = lines[i]->start;
828                         for (int m = 0; m < size; m++) {
829                                 float num;
830                                 in >> num;
831                                 
832                                 DE[count] = num;
833                                 count++;
834                         }
835                         
836                         gobble(in);
837                         
838                         count = lines[i]->start;
839                         for (int m = 0; m < size; m++) {
840                                 float num;
841                                 in >> num;
842                                 
843                                 deviation[count] = num;
844                                 count++;
845                         }
846
847                         in.close();
848                         remove(s.c_str());
849                 }
850
851                                 
852 #else
853                         mothurOut("Calculating observed distance... "); cout.flush();
854                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
855                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
856                                 obsDistance[i] = obsi;
857                         }
858                         mothurOut("Done."); mothurOutEndLine();
859                         
860                         
861                         
862                         mothurOut("Finding variability... "); cout.flush();
863                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
864                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
865                                 Qav[i] = q;
866                         }
867                         mothurOut("Done."); mothurOutEndLine();
868                         
869                         
870                         
871                         mothurOut("Calculating alpha... "); cout.flush();
872                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
873                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
874                                 seqCoef.push_back(alpha);
875                         }
876                         mothurOut("Done."); mothurOutEndLine();
877                 
878                 
879                 
880                         mothurOut("Calculating expected distance... "); cout.flush();
881                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
882                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
883                                 expectedDistance[i] = exp;
884                         }
885                         mothurOut("Done."); mothurOutEndLine();
886                         
887                         
888                         
889                         mothurOut("Finding deviation... "); cout.flush();
890                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
891                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
892                                 DE[i] = de;
893                                 
894                                 it = trimmed[i].begin();
895                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
896                                 deviation[i] = dist;
897                         }
898                         mothurOut("Done."); mothurOutEndLine();
899
900 #endif          
901         }
902         catch(exception& e) {
903                 errorOut(e, "Pintail", "createProcesses");
904                 exit(1);
905         }
906 }
907
908
909 /**************************************************************************************************/
910
911 void Pintail::createProcessesQuan() {
912         try {
913 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
914                 int process = 0;
915                 vector<int> processIDS;
916                                 
917                 //loop through and create all the processes you want
918                 while (process != processors) {
919                         int pid = fork();
920                         
921                         if (pid > 0) {
922                                 processIDS.push_back(pid);  
923                                 process++;
924                         }else if (pid == 0){
925                                 
926                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
927                                 
928                                 //write out data to file so parent can read it
929                                 ofstream out;
930                                 string s = toString(getpid()) + ".temp";
931                                 openOutputFile(s, out);
932                                 
933                                                                 
934                                 //output observed distances
935                                 for (int i = 0; i < quantilesMembers.size(); i++) {
936                                         out << quantilesMembers[i].size() << '\t';
937                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
938                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
939                                         }
940                                         out << endl;
941                                 }
942                                 
943                                 out.close();
944                                 
945                                 exit(0);
946                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
947                 }
948                 
949                 //force parent to wait until all the processes are done
950                 for (int i=0;i<processors;i++) { 
951                         int temp = processIDS[i];
952                         wait(&temp);
953                 }
954
955                 //get data created by processes
956                 for (int i=0;i<processors;i++) { 
957                         ifstream in;
958                         string s = toString(processIDS[i]) + ".temp";
959                         openInputFile(s, in);
960                         
961                         vector< vector<quanMember> > quan; 
962                         quan.resize(100);
963                         
964                         //get quantiles
965                         for (int m = 0; m < quan.size(); m++) {
966                                 int num;
967                                 in >> num; 
968                                 
969                                 gobble(in);
970
971                                 vector<quanMember> q;  float w; int b, n;
972                                 for (int j = 0; j < num; j++) {
973                                         in >> w >> b >> n;
974         //cout << w << '\t' << b << '\t' n << endl;
975                                         quanMember newMember(w, b, n);
976                                         q.push_back(newMember);
977                                 }
978 //cout << "here" << endl;
979                                 quan[m] = q;
980 //cout << "now here" << endl;
981                                 gobble(in);
982                         }
983                         
984         
985                         //save quan in quantiles
986                         for (int j = 0; j < quan.size(); j++) {
987                                 //put all values of q[i] into quan[i]
988                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
989                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
990                         }
991                                         
992                         in.close();
993                         remove(s.c_str());
994                 }
995                 
996 #else
997                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
998 #endif          
999         }
1000         catch(exception& e) {
1001                 errorOut(e, "Pintail", "createProcessesQuan");
1002                 exit(1);
1003         }
1004 }
1005
1006
1007 //***************************************************************************************************************
1008
1009