]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
checking in chimera files in progress after move to michigan
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareQuanMembers(quanMember left, quanMember right){
17         return (left.score < right.score);      
18
19 //***************************************************************************************************************
20
21 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
22 //***************************************************************************************************************
23
24 Pintail::~Pintail() {
25         try {
26                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
27                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
28                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
29         }
30         catch(exception& e) {
31                 errorOut(e, "Pintail", "~Pintail");
32                 exit(1);
33         }
34 }       
35 //***************************************************************************************************************
36 void Pintail::print(ostream& out) {
37         try {
38                 
39                 mothurOutEndLine();
40                 
41                 for (int i = 0; i < querySeqs.size(); i++) {
42                         
43                         int index = ceil(deviation[i]);
44                         
45                         if (index == 0) { index=1;  }
46                                                 
47                         //is your DE value higher than the 95%
48                         string chimera;
49                         if (DE[i] > quantiles[index-1][4])              {       chimera = "Yes";        }
50                         else                                                                    {       chimera = "No";         }
51                         
52                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
53                         if (chimera == "Yes") {
54                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
55                         }
56                         out << "Observed\t";
57                         
58                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
59                         out << endl;
60                         
61                         out << "Expected\t";
62                         
63                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
64                         out << endl;
65                         
66                 }
67         }
68         catch(exception& e) {
69                 errorOut(e, "Pintail", "print");
70                 exit(1);
71         }
72 }
73
74 //***************************************************************************************************************
75 void Pintail::getChimeras() {
76         try {
77                 
78                 //read in query sequences and subject sequences
79                 mothurOut("Reading sequences and template file... "); cout.flush();
80                 querySeqs = readSeqs(fastafile);
81                 templateSeqs = readSeqs(templateFile);
82                 mothurOut("Done."); mothurOutEndLine();
83                 
84                 int numSeqs = querySeqs.size();
85                 
86                 obsDistance.resize(numSeqs);
87                 expectedDistance.resize(numSeqs);
88                 seqCoef.resize(numSeqs);
89                 DE.resize(numSeqs);
90                 Qav.resize(numSeqs);
91                 bestfit.resize(numSeqs);
92                 deviation.resize(numSeqs);
93                 trimmed.resize(numSeqs);
94                 windowSizes.resize(numSeqs, window);
95                 windowSizesTemplate.resize(templateSeqs.size(), window);
96                 windowsForeachQuery.resize(numSeqs);
97                 h.resize(numSeqs);
98                 quantiles.resize(100);  //one for every percent mismatch
99                 quantilesMembers.resize(100);  //one for every percent mismatch
100                 
101                 //break up file if needed
102                 int linesPerProcess = numSeqs / processors ;
103                 
104                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
105                         //find breakup of sequences for all times we will Parallelize
106                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
107                         else {
108                                 //fill line pairs
109                                 for (int i = 0; i < (processors-1); i++) {                      
110                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
111                                 }
112                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
113                                 int i = processors - 1;
114                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
115                         }
116                         
117                         //find breakup of templatefile for quantiles
118                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
119                         else { 
120                                 for (int i = 0; i < processors; i++) {
121                                         templateLines.push_back(new linePair());
122                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
123                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
124                                 }
125                         }
126                 #else
127                         lines.push_back(new linePair(0, numSeqs));
128                         templateLines.push_back(new linePair(0, templateSeqs.size()));
129                 #endif
130                 
131                 distcalculator = new eachGapDist();
132                 decalc = new DeCalculator();
133                 
134                 //if the user does enter a mask then you want to keep all the spots in the alignment
135                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
136                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
137                 
138                 decalc->setMask(seqMask);
139                 
140                 //find pairs
141                 if (processors == 1) { 
142                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
143                         bestfit = findPairs(lines[0]->start, lines[0]->end);
144                         mothurOut("Done."); mothurOutEndLine();
145                 }else {         createProcessesPairs();         }
146                 
147 /*string o = "foronlinepintailpairs-eachgap";
148 ofstream out7;
149 openOutputFile(o, out7);
150
151 for (int i = 0; i < bestfit.size(); i++) {
152         out7 << querySeqs[i]->getName() << endl;
153         out7 << querySeqs[i]->getUnaligned() << endl << endl;
154         
155         out7 << bestfit[i]->getName() << endl;
156         out7 << bestfit[i]->getUnaligned() << endl << endl << endl;
157 }               
158 out7.close();/*/        
159                 //find P
160                 mothurOut("Getting conservation... "); cout.flush();
161                 if (consfile == "") { 
162                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
163                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
164                         mothurOut("Done."); mothurOutEndLine();
165                 }else                           {   probabilityProfile = readFreq();                      }
166
167                 //make P into Q
168                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
169                 mothurOut("Done."); mothurOutEndLine();
170                 
171                 //mask sequences if the user wants to 
172                 if (seqMask != "") {
173                         //mask querys
174                         for (int i = 0; i < querySeqs.size(); i++) {
175                                 decalc->runMask(querySeqs[i]);
176                         }
177                 
178                         //mask templates
179                         for (int i = 0; i < templateSeqs.size(); i++) {
180                                 decalc->runMask(templateSeqs[i]);
181                         }
182                         
183                         for (int i = 0; i < bestfit.size(); i++) { 
184                                 decalc->runMask(bestfit[i]);
185                         }
186
187                 }
188                 
189                 if (filter) {
190                         vector<Sequence*> temp = templateSeqs;
191                         for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
192                         
193                         createFilter(temp);
194                         
195                         runFilter(querySeqs);
196                         runFilter(templateSeqs);
197                         runFilter(bestfit);
198                 }
199                                                                                 
200                                                                                                                                                 
201                 if (processors == 1) { 
202         
203                         for (int j = 0; j < bestfit.size(); j++) { 
204                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
205                         }
206                         
207                         mothurOut("Finding window breaks... "); cout.flush();
208                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
209                                 it = trimmed[i].begin();
210                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
211                                 windowsForeachQuery[i] = win;
212                         }
213                         mothurOut("Done."); mothurOutEndLine();
214                 
215                 }else {         createProcessesSpots();         }
216                 
217                 if (processors == 1) { 
218                                                 
219                         mothurOut("Calculating observed distance... "); cout.flush();
220                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
221                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
222         
223                                 obsDistance[i] = obsi;
224                         }
225                         mothurOut("Done."); mothurOutEndLine();
226                         
227                         
228                         mothurOut("Finding variability... "); cout.flush();
229                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
230                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
231
232                                 Qav[i] = q;
233                         }
234                         mothurOut("Done."); mothurOutEndLine();
235                         
236                         
237                         mothurOut("Calculating alpha... "); cout.flush();
238                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
239                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
240                                 seqCoef[i] = alpha;
241                         }
242                         mothurOut("Done."); mothurOutEndLine();
243                 
244                 
245                         mothurOut("Calculating expected distance... "); cout.flush();
246                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
247                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
248                                 expectedDistance[i] = exp;
249                         }
250                         mothurOut("Done."); mothurOutEndLine();
251                         
252                         
253                         mothurOut("Finding deviation... "); cout.flush();
254                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
255                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
256                                 DE[i] = de;
257                         
258                                 it = trimmed[i].begin();
259                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
260                                 deviation[i] = dist;
261                         }
262                         mothurOut("Done."); mothurOutEndLine();
263                         
264                 } 
265                 else {          createProcesses();              }
266                 
267                 //quantiles are used to determine whether the de values found indicate a chimera
268                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
269                 //combination of sequences in the template
270                 if (quanfile != "") {  quantiles = readQuantiles();  }
271                 else {
272                         
273                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
274                         if (processors == 1) { 
275                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
276                         }else {         createProcessesQuan();          }
277                 
278                         
279                         ofstream out4, out5;
280                         string noOutliers, outliers;
281                         
282                         noOutliers = getRootName(templateFile) + "pintail.quanNOOUTLIERS";
283                         outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
284                         
285                         openOutputFile(outliers, out4);
286                         
287                         //adjust quantiles
288                         for (int i = 0; i < quantilesMembers.size(); i++) {
289                                 vector<float> temp;
290                                 
291                                 if (quantilesMembers[i].size() == 0) {
292                                         //in case this is not a distance found in your template files
293                                         for (int g = 0; g < 6; g++) {
294                                                 temp.push_back(0.0);
295                                         }
296                                 }else{
297                                         
298                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
299                                         
300                                         //save 10%
301                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
302                                         //save 25%
303                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
304                                         //save 50%
305                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
306                                         //save 75%
307                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
308                                         //save 95%
309                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
310                                         //save 99%
311                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
312                                         
313                                 }
314                                 
315                                 //output quan value
316                                 out4 << i+1 << '\t';                            
317                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
318                                 out4 << endl;
319                                 
320                                 quantiles[i] = temp;
321                                 
322                         }
323                         
324                         out4.close();
325                         
326                         decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
327                         
328                         openOutputFile(noOutliers, out5);
329                         
330                         //adjust quantiles
331                         for (int i = 0; i < quantilesMembers.size(); i++) {
332                                 vector<float> temp;
333                                 
334                                 if (quantilesMembers[i].size() == 0) {
335                                         //in case this is not a distance found in your template files
336                                         for (int g = 0; g < 6; g++) {
337                                                 temp.push_back(0.0);
338                                         }
339                                 }else{
340                                         
341                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
342                                         
343                                         //save 10%
344                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
345                                         //save 25%
346                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
347                                         //save 50%
348                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
349                                         //save 75%
350                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
351                                         //save 95%
352                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
353                                         //save 99%
354                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
355                                         
356                                 }
357                                 
358                                 //output quan value
359                                 out5 << i+1 << '\t';                            
360                                 for (int u = 0; u < temp.size(); u++) {   out5 << temp[u] << '\t'; }
361                                 out5 << endl;
362                                 
363                                 quantiles[i] = temp;
364                                 
365                         }
366
367                         mothurOut("Done."); mothurOutEndLine();
368                         
369                 }
370                         
371                 //free memory
372                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
373                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
374                         
375                 delete distcalculator;
376                 delete decalc;
377         }
378         catch(exception& e) {
379                 errorOut(e, "Pintail", "getChimeras");
380                 exit(1);
381         }
382 }
383
384 //***************************************************************************************************************
385
386 vector<float> Pintail::readFreq() {
387         try {
388         
389                 ifstream in;
390                 openInputFile(consfile, in);
391                 
392                 vector<float> prob;
393                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
394                 
395                 //read in probabilities and store in vector
396                 int pos; float num; 
397                 
398                 while(!in.eof()){
399                         
400                         in >> pos >> num;
401                         
402                         if (h.count(pos) > 0) {
403                                 float Pi;
404                                 Pi =  (num - 0.25) / 0.75; 
405                         
406                                 //cannot have probability less than 0.
407                                 if (Pi < 0) { Pi = 0.0; }
408
409                                 //do you want this spot
410                                 prob.push_back(Pi);  
411                         }
412                         
413                         gobble(in);
414                 }
415                 
416                 in.close();
417                 return prob;
418                 
419         }
420         catch(exception& e) {
421                 errorOut(e, "Pintail", "readFreq");
422                 exit(1);
423         }
424 }
425
426 //***************************************************************************************************************
427 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
428 vector<Sequence*> Pintail::findPairs(int start, int end) {
429         try {
430                 
431                 vector<Sequence*> seqsMatches;  
432                 
433                 for(int i = start; i < end; i++){
434                 
435                         float smallest = 10000.0;
436                         Sequence query = *(querySeqs[i]);
437                         Sequence* match;
438                         
439                         for(int j = 0; j < templateSeqs.size(); j++){
440                                 
441                                 Sequence temp = *(templateSeqs[j]);
442                                 
443                                 distcalculator->calcDist(query, temp);
444                                 float dist = distcalculator->getDist();
445                                 
446                                 if (dist < smallest) { 
447                                         match = templateSeqs[j];
448                                         smallest = dist;
449                                 }
450                         }
451                         
452                         //make copy so trimSeqs doesn't overtrim
453                         Sequence* copy = new Sequence(match->getName(), match->getAligned());
454                         
455                         seqsMatches.push_back(copy);
456                 }
457                 
458                 return seqsMatches;
459         
460         }
461         catch(exception& e) {
462                 errorOut(e, "Pintail", "findPairs");
463                 exit(1);
464         }
465 }
466
467 /**************************************************************************************************/
468
469 void Pintail::createProcessesSpots() {
470         try {
471 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
472                 int process = 0;
473                 vector<int> processIDS;
474                 
475                 //loop through and create all the processes you want
476                 while (process != processors) {
477                         int pid = fork();
478                         
479                         if (pid > 0) {
480                                 processIDS.push_back(pid);  
481                                 process++;
482                         }else if (pid == 0){
483                                 
484                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
485                                 
486                                         //chops off beginning and end of sequences so they both start and end with a base
487                                         map<int, int> trim;
488
489                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
490                                         trimmed[j] = trim;
491                                         
492                                 }
493
494                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
495                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
496                                         it = trimmed[i].begin();
497                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
498                                 }
499                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
500                                 
501                                 //write out data to file so parent can read it
502                                 ofstream out;
503                                 string s = toString(getpid()) + ".temp";
504                                 openOutputFile(s, out);
505                                 
506                                 //output windowsForeachQuery
507                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
508                                         out << windowsForeachQuery[i].size() << '\t';
509                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
510                                                 out << windowsForeachQuery[i][j] << '\t';
511                                         }
512                                         out << endl;
513                                 }
514                         
515                                 //output windowSizes
516                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
517                                         out << windowSizes[i] << '\t';
518                                 }
519                                 out << endl;
520                                 
521                                 //output trimmed values
522                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
523                                         it = trimmed[i].begin();                                        
524                                         out << it->first << '\t' << it->second << endl;
525                                 }
526                                 out.close();
527                                 
528                                 exit(0);
529                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
530                 }
531                 
532                 //force parent to wait until all the processes are done
533                 for (int i=0;i<processors;i++) { 
534                         int temp = processIDS[i];
535                         wait(&temp);
536                 }
537                 
538                 //get data created by processes
539                 for (int i=0;i<processors;i++) { 
540                         ifstream in;
541                         string s = toString(processIDS[i]) + ".temp";
542                         openInputFile(s, in);
543                         
544                         int size = lines[i]->end - lines[i]->start;
545                                         
546                         int count = lines[i]->start;
547                         for (int m = 0; m < size; m++) {
548                                 int num;
549                                 in >> num;
550                         
551                                 vector<int> win;  int w;
552                                 for (int j = 0; j < num; j++) {
553                                         in >> w;
554                                         win.push_back(w);
555                                 }
556                         
557                                 windowsForeachQuery[count] = win;
558                                 count++;
559                                 gobble(in);
560                         }
561                 
562                         gobble(in);
563                         count = lines[i]->start;
564                         for (int m = 0; m < size; m++) {
565                                 int num;
566                                 in >> num;
567                                 
568                                 windowSizes[count] = num;
569                                 count++;
570                         }
571                         
572                         gobble(in);
573                         
574                         count = lines[i]->start;
575                         for (int m = 0; m < size; m++) {
576                                 int front, back;
577                                 in >> front >> back;
578                         
579                                 map<int, int> t;
580                                 
581                                 t[front] = back;
582                                 
583                                 trimmed[count] = t;
584                                 count++;
585                                 
586                                 gobble(in);
587                         }
588
589                         
590                         in.close();
591                         remove(s.c_str());
592                 }
593                         
594         
595 #else
596                 for (int j = 0; j < bestfit.size(); j++) {
597                         //chops off beginning and end of sequences so they both start and end with a base
598                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
599                 }
600
601                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
602                                 it = trimmed[i].begin();
603                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
604                                 windowsForeachQuery[i] = win;
605                 }
606
607 #endif          
608         }
609         catch(exception& e) {
610                 errorOut(e, "Pintail", "createProcessesSpots");
611                 exit(1);
612         }
613 }
614 /**************************************************************************************************/
615
616 void Pintail::createProcessesPairs() {
617         try {
618 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
619                 int process = 0;
620                 vector<int> processIDS;
621                 
622                 //loop through and create all the processes you want
623                 while (process != processors) {
624                         int pid = fork();
625                         
626                         if (pid > 0) {
627                                 processIDS.push_back(pid);  
628                                 process++;
629                         }else if (pid == 0){
630                                 
631                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
632                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
633                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
634                                 
635                                 //write out data to file so parent can read it
636                                 ofstream out;
637                                 string s = toString(getpid()) + ".temp";
638                                 openOutputFile(s, out);
639                                 
640                                 //output range and size
641                                 out << bestfit.size() << endl;
642                                 
643                                 //output pairs
644                                 for (int i = 0; i < bestfit.size(); i++) {
645                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
646                                 }
647                                 out.close();
648                                 
649                                 exit(0);
650                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
651                 }
652                 
653                 //force parent to wait until all the processes are done
654                 for (int i=0;i<processors;i++) { 
655                         int temp = processIDS[i];
656                         wait(&temp);
657                 }
658                 
659                 //get data created by processes
660                 for (int i=0;i<processors;i++) { 
661                         ifstream in;
662                         string s = toString(processIDS[i]) + ".temp";
663                         openInputFile(s, in);
664                         
665                         int size;
666                         in >> size;  gobble(in);
667                                 
668                         //get pairs
669                         int count = lines[i]->start;
670                         for (int m = 0; m < size; m++) {
671                                 Sequence* temp = new Sequence(in);
672                                 bestfit[count] = temp;
673                         
674                                 count++;
675                                 gobble(in);
676                         }
677                         
678                         in.close();
679                         remove(s.c_str());
680                 }
681                         
682         
683 #else
684                 bestfit = findPairs(lines[0]->start, lines[0]->end);
685 #endif          
686         }
687         catch(exception& e) {
688                 errorOut(e, "Pintail", "createProcessesPairs");
689                 exit(1);
690         }
691 }
692 /**************************************************************************************************/
693
694 void Pintail::createProcesses() {
695         try {
696 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
697                 int process = 0;
698                 vector<int> processIDS;
699                 
700                 //loop through and create all the processes you want
701                 while (process != processors) {
702                         int pid = fork();
703                         
704                         if (pid > 0) {
705                                 processIDS.push_back(pid);  
706                                 process++;
707                         }else if (pid == 0){
708                                 
709                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
710                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
711                                         
712                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
713                                         obsDistance[i] = obsi;
714                                 
715                                         //calc Qav
716                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
717                                         
718                                         //get alpha
719                                         float alpha = decalc->getCoef(obsDistance[i], q);
720                                         
721                                         //find expected
722                                         vector<float> exp = decalc->calcExpected(q, alpha);
723                                         expectedDistance[i] = exp;
724                                         
725                                         //get de and deviation
726                                         float dei = decalc->calcDE(obsi, exp);
727                                         DE[i] = dei;
728                                         
729                                         it = trimmed[i].begin();
730                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
731                                         deviation[i] = dist;
732                                 }
733                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
734                                 
735                                 //write out data to file so parent can read it
736                                 ofstream out;
737                                 string s = toString(getpid()) + ".temp";
738                                 openOutputFile(s, out);
739                                 
740                                 int size = lines[process]->end - lines[process]->start;
741                                 out << size << endl;
742                                                                 
743                                 //output observed distances
744                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
745                                         out << obsDistance[i].size() << '\t';
746                                         for (int j = 0; j < obsDistance[i].size(); j++) {
747                                                 out << obsDistance[i][j] << '\t';
748                                         }
749                                         out << endl;
750                                 }
751                                 
752                                 
753                                 //output expected distances
754                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
755                                         out << expectedDistance[i].size() << '\t';
756                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
757                                                 out << expectedDistance[i][j] << '\t';
758                                         }
759                                         out << endl;
760                                 }
761
762                         
763                                 //output de values
764                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
765                                         out << DE[i] << '\t';
766                                 }
767                                 out << endl;    
768                                 
769                                 //output de values
770                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
771                                         out << deviation[i] << '\t';
772                                 }
773                                 out << endl;
774                                 
775                                 out.close();
776
777                                 exit(0);
778                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
779                 }
780                 
781                 //force parent to wait until all the processes are done
782                 for (int i=0;i<processors;i++) { 
783                         int temp = processIDS[i];
784                         wait(&temp);
785                 }
786                 
787                 //get data created by processes
788                 for (int i=0;i<processors;i++) { 
789                         ifstream in;
790                         string s = toString(processIDS[i]) + ".temp";
791                         openInputFile(s, in);
792                         
793                         int size;
794                         in >> size;  gobble(in);
795                         
796                         //get observed distances
797                         int count = lines[i]->start;
798                         for (int m = 0; m < size; m++) {
799                                 int num;
800                                 in >> num;
801                         
802                                 vector<float> obs;  float w;
803                                 for (int j = 0; j < num; j++) {
804                                         in >> w;
805                                         obs.push_back(w);
806                                 }
807                         
808                                 obsDistance[count] = obs;
809                                 count++;
810                                 gobble(in);
811                         }
812                         
813                         gobble(in);
814                         
815                         //get expected distances
816                         count = lines[i]->start;
817                         for (int m = 0; m < size; m++) {
818                                 int num;
819                                 in >> num;
820                         
821                                 vector<float> exp;  float w;
822                                 for (int j = 0; j < num; j++) {
823                                         in >> w;
824                                         exp.push_back(w);
825                                 }
826                         
827                                 expectedDistance[count] = exp;
828                                 count++;
829                                 gobble(in);
830                         }
831
832                         gobble(in);
833                         
834                         count = lines[i]->start;
835                         for (int m = 0; m < size; m++) {
836                                 float num;
837                                 in >> num;
838                                 
839                                 DE[count] = num;
840                                 count++;
841                         }
842                         
843                         gobble(in);
844                         
845                         count = lines[i]->start;
846                         for (int m = 0; m < size; m++) {
847                                 float num;
848                                 in >> num;
849                                 
850                                 deviation[count] = num;
851                                 count++;
852                         }
853
854                         in.close();
855                         remove(s.c_str());
856                 }
857
858                                 
859 #else
860                         mothurOut("Calculating observed distance... "); cout.flush();
861                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
862                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
863                                 obsDistance[i] = obsi;
864                         }
865                         mothurOut("Done."); mothurOutEndLine();
866                         
867                         
868                         
869                         mothurOut("Finding variability... "); cout.flush();
870                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
871                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
872                                 Qav[i] = q;
873                         }
874                         mothurOut("Done."); mothurOutEndLine();
875                         
876                         
877                         
878                         mothurOut("Calculating alpha... "); cout.flush();
879                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
880                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
881                                 seqCoef.push_back(alpha);
882                         }
883                         mothurOut("Done."); mothurOutEndLine();
884                 
885                 
886                 
887                         mothurOut("Calculating expected distance... "); cout.flush();
888                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
889                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
890                                 expectedDistance[i] = exp;
891                         }
892                         mothurOut("Done."); mothurOutEndLine();
893                         
894                         
895                         
896                         mothurOut("Finding deviation... "); cout.flush();
897                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
898                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
899                                 DE[i] = de;
900                                 
901                                 it = trimmed[i].begin();
902                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
903                                 deviation[i] = dist;
904                         }
905                         mothurOut("Done."); mothurOutEndLine();
906
907 #endif          
908         }
909         catch(exception& e) {
910                 errorOut(e, "Pintail", "createProcesses");
911                 exit(1);
912         }
913 }
914
915
916 /**************************************************************************************************/
917
918 void Pintail::createProcessesQuan() {
919         try {
920 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
921                 int process = 0;
922                 vector<int> processIDS;
923                                 
924                 //loop through and create all the processes you want
925                 while (process != processors) {
926                         int pid = fork();
927                         
928                         if (pid > 0) {
929                                 processIDS.push_back(pid);  
930                                 process++;
931                         }else if (pid == 0){
932                                 
933                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
934                                 
935                                 //write out data to file so parent can read it
936                                 ofstream out;
937                                 string s = toString(getpid()) + ".temp";
938                                 openOutputFile(s, out);
939                                 
940                                                                 
941                                 //output observed distances
942                                 for (int i = 0; i < quantilesMembers.size(); i++) {
943                                         out << quantilesMembers[i].size() << '\t';
944                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
945                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
946                                         }
947                                         out << endl;
948                                 }
949                                 
950                                 out.close();
951                                 
952                                 exit(0);
953                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
954                 }
955                 
956                 //force parent to wait until all the processes are done
957                 for (int i=0;i<processors;i++) { 
958                         int temp = processIDS[i];
959                         wait(&temp);
960                 }
961
962                 //get data created by processes
963                 for (int i=0;i<processors;i++) { 
964                         ifstream in;
965                         string s = toString(processIDS[i]) + ".temp";
966                         openInputFile(s, in);
967                         
968                         vector< vector<quanMember> > quan; 
969                         quan.resize(100);
970                         
971                         //get quantiles
972                         for (int m = 0; m < quan.size(); m++) {
973                                 int num;
974                                 in >> num; 
975                                 
976                                 gobble(in);
977
978                                 vector<quanMember> q;  float w; int b, n;
979                                 for (int j = 0; j < num; j++) {
980                                         in >> w >> b >> n;
981         //cout << w << '\t' << b << '\t' n << endl;
982                                         quanMember newMember(w, b, n);
983                                         q.push_back(newMember);
984                                 }
985 //cout << "here" << endl;
986                                 quan[m] = q;
987 //cout << "now here" << endl;
988                                 gobble(in);
989                         }
990                         
991         
992                         //save quan in quantiles
993                         for (int j = 0; j < quan.size(); j++) {
994                                 //put all values of q[i] into quan[i]
995                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
996                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
997                         }
998                                         
999                         in.close();
1000                         remove(s.c_str());
1001                 }
1002                 
1003 #else
1004                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
1005 #endif          
1006         }
1007         catch(exception& e) {
1008                 errorOut(e, "Pintail", "createProcessesQuan");
1009                 exit(1);
1010         }
1011 }
1012
1013
1014 //***************************************************************************************************************
1015
1016