]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
started adding chimeraslayer method and fixed minor bug in treegroupscommand deconstr...
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareQuanMembers(quanMember left, quanMember right){
17         return (left.score < right.score);      
18
19 //***************************************************************************************************************
20
21 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
22 //***************************************************************************************************************
23
24 Pintail::~Pintail() {
25         try {
26                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
27                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
28                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
29         }
30         catch(exception& e) {
31                 errorOut(e, "Pintail", "~Pintail");
32                 exit(1);
33         }
34 }       
35 //***************************************************************************************************************
36 void Pintail::print(ostream& out) {
37         try {
38                 
39                 mothurOutEndLine();
40                 
41                 for (int i = 0; i < querySeqs.size(); i++) {
42                         
43                         int index = ceil(deviation[i]);
44                                                 
45                         //is your DE value higher than the 95%
46                         string chimera;
47                         if (quantiles[index][4] == 0.0) {
48                                 chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
49                         }else {
50                                 if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
51                                 else                                                                    {       chimera = "No";         }
52                         }
53                         
54                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
55                         if (chimera == "Yes") {
56                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
57                         }
58                         out << "Observed\t";
59                         
60                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
61                         out << endl;
62                         
63                         out << "Expected\t";
64                         
65                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
66                         out << endl;
67                         
68                 }
69         }
70         catch(exception& e) {
71                 errorOut(e, "Pintail", "print");
72                 exit(1);
73         }
74 }
75
76 //***************************************************************************************************************
77 void Pintail::getChimeras() {
78         try {
79                 
80                 //read in query sequences and subject sequences
81                 mothurOut("Reading sequences and template file... "); cout.flush();
82                 querySeqs = readSeqs(fastafile);
83                 templateSeqs = readSeqs(templateFile);
84                 mothurOut("Done."); mothurOutEndLine();
85                 
86                 int numSeqs = querySeqs.size();
87                 
88                 obsDistance.resize(numSeqs);
89                 expectedDistance.resize(numSeqs);
90                 seqCoef.resize(numSeqs);
91                 DE.resize(numSeqs);
92                 Qav.resize(numSeqs);
93                 bestfit.resize(numSeqs);
94                 deviation.resize(numSeqs);
95                 trimmed.resize(numSeqs);
96                 windowSizes.resize(numSeqs, window);
97                 windowSizesTemplate.resize(templateSeqs.size(), window);
98                 windowsForeachQuery.resize(numSeqs);
99                 h.resize(numSeqs);
100                 quantiles.resize(100);  //one for every percent mismatch
101                 quantilesMembers.resize(100);  //one for every percent mismatch
102                 
103                 //break up file if needed
104                 int linesPerProcess = numSeqs / processors ;
105                 
106                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107                         //find breakup of sequences for all times we will Parallelize
108                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
109                         else {
110                                 //fill line pairs
111                                 for (int i = 0; i < (processors-1); i++) {                      
112                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
113                                 }
114                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
115                                 int i = processors - 1;
116                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
117                         }
118                         
119                         //find breakup of templatefile for quantiles
120                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
121                         else { 
122                                 for (int i = 0; i < processors; i++) {
123                                         templateLines.push_back(new linePair());
124                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
125                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
126                                 }
127                         }
128                 #else
129                         lines.push_back(new linePair(0, numSeqs));
130                         templateLines.push_back(new linePair(0, templateSeqs.size()));
131                 #endif
132                 
133                 distcalculator = new eachGapDist();
134                 decalc = new DeCalculator();
135                 
136                 //if the user does enter a mask then you want to keep all the spots in the alignment
137                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
138                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
139                 
140                 decalc->setMask(seqMask);
141                 
142                 //find pairs
143                 if (processors == 1) { 
144                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
145                         bestfit = findPairs(lines[0]->start, lines[0]->end);
146                         mothurOut("Done."); mothurOutEndLine();
147                 }else {         createProcessesPairs();         }
148                 
149 //string o = "closestmatch.eachgap.fasta";
150 //ofstream out7;
151 //openOutputFile(o, out7);
152
153 //for (int i = 0; i < bestfit.size(); i++) {
154         //out7 << ">" << querySeqs[i]->getName() << "-"<< bestfit[i]->getName() << endl;
155         //out7 << bestfit[i]->getAligned() << endl;
156 //}             
157 //out7.close(); 
158                 //find P
159                 mothurOut("Getting conservation... "); cout.flush();
160                 if (consfile == "") { 
161                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
162                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
163                         mothurOut("Done."); mothurOutEndLine();
164                 }else                           {   probabilityProfile = readFreq();                      }
165
166                 //make P into Q
167                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
168                 mothurOut("Done."); mothurOutEndLine();
169                 
170                 //mask sequences if the user wants to 
171                 if (seqMask != "") {
172                         //mask querys
173                         for (int i = 0; i < querySeqs.size(); i++) {
174                                 decalc->runMask(querySeqs[i]);
175                         }
176                 
177                         //mask templates
178                         for (int i = 0; i < templateSeqs.size(); i++) {
179                                 decalc->runMask(templateSeqs[i]);
180                         }
181                         
182                         for (int i = 0; i < bestfit.size(); i++) { 
183                                 decalc->runMask(bestfit[i]);
184                         }
185
186                 }
187                 
188                 if (filter) {
189                         vector<Sequence*> temp = templateSeqs;
190                         for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
191                         
192                         createFilter(temp);
193                         
194                         runFilter(querySeqs);
195                         runFilter(templateSeqs);
196                         runFilter(bestfit);
197                 }
198                                                                                 
199                                                                                                                                                 
200                 if (processors == 1) { 
201         
202                         for (int j = 0; j < bestfit.size(); j++) { 
203                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
204                         }
205                         
206                         mothurOut("Finding window breaks... "); cout.flush();
207                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
208                                 it = trimmed[i].begin();
209                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
210                                 windowsForeachQuery[i] = win;
211                         }
212                         mothurOut("Done."); mothurOutEndLine();
213                 
214                 }else {         createProcessesSpots();         }
215                 
216                 if (processors == 1) { 
217                                                 
218                         mothurOut("Calculating observed distance... "); cout.flush();
219                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
220                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
221         
222                                 obsDistance[i] = obsi;
223                         }
224                         mothurOut("Done."); mothurOutEndLine();
225                         
226                         
227                         mothurOut("Finding variability... "); cout.flush();
228                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
229                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
230
231                                 Qav[i] = q;
232                         }
233                         mothurOut("Done."); mothurOutEndLine();
234                         
235                         
236                         mothurOut("Calculating alpha... "); cout.flush();
237                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
238                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
239                                 seqCoef[i] = alpha;
240                         }
241                         mothurOut("Done."); mothurOutEndLine();
242                 
243                 
244                         mothurOut("Calculating expected distance... "); cout.flush();
245                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
246                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
247                                 expectedDistance[i] = exp;
248                         }
249                         mothurOut("Done."); mothurOutEndLine();
250                         
251                         
252                         mothurOut("Finding deviation... "); cout.flush();
253                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
254                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
255                                 DE[i] = de;
256                         
257                                 it = trimmed[i].begin();
258                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
259                                 deviation[i] = dist;
260                         }
261                         mothurOut("Done."); mothurOutEndLine();
262                         
263                 } 
264                 else {          createProcesses();              }
265                 
266                 //quantiles are used to determine whether the de values found indicate a chimera
267                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
268                 //combination of sequences in the template
269                 if (quanfile != "") {  quantiles = readQuantiles();  }
270                 else {
271                         
272                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
273                         if (processors == 1) { 
274                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
275                         }else {         createProcessesQuan();          }
276                 
277                         
278                         ofstream out4, out5;
279                         string noOutliers, outliers;
280                         
281                         if ((!filter) && (seqMask == "")) {
282                                 noOutliers = getRootName(templateFile) + "pintail.quan";
283                         }else if ((filter) && (seqMask == "")) { 
284                                 noOutliers = getRootName(templateFile) + "pintail.filtered.quan";
285                         }else if ((!filter) && (seqMask != "")) { 
286                                 noOutliers = getRootName(templateFile) + "pintail.masked.quan";
287                         }else if ((filter) && (seqMask != "")) { 
288                                 noOutliers = getRootName(templateFile) + "pintail.filtered.masked.quan";
289                         }
290
291                         //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
292                         
293                         /*openOutputFile(outliers, out4);
294                         
295                         //adjust quantiles
296                         for (int i = 0; i < quantilesMembers.size(); i++) {
297                                 vector<float> temp;
298                                 
299                                 if (quantilesMembers[i].size() == 0) {
300                                         //in case this is not a distance found in your template files
301                                         for (int g = 0; g < 6; g++) {
302                                                 temp.push_back(0.0);
303                                         }
304                                 }else{
305                                         
306                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
307                                         
308                                         //save 10%
309                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
310                                         //save 25%
311                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
312                                         //save 50%
313                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
314                                         //save 75%
315                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
316                                         //save 95%
317                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
318                                         //save 99%
319                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
320                                         
321                                 }
322                                 
323                                 //output quan value
324                                 out4 << i+1 << '\t';                            
325                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
326                                 out4 << endl;
327                                 
328                                 quantiles[i] = temp;
329                                 
330                         }
331                         
332                         out4.close();*/
333                         
334                         decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
335                         
336                         openOutputFile(noOutliers, out5);
337                         
338                         //adjust quantiles
339                         for (int i = 0; i < quantilesMembers.size(); i++) {
340                                 vector<float> temp;
341                                 
342                                 if (quantilesMembers[i].size() == 0) {
343                                         //in case this is not a distance found in your template files
344                                         for (int g = 0; g < 6; g++) {
345                                                 temp.push_back(0.0);
346                                         }
347                                 }else{
348                                         
349                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
350                                         
351                                         //save 10%
352                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
353                                         //save 25%
354                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
355                                         //save 50%
356                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
357                                         //save 75%
358                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
359                                         //save 95%
360                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
361                                         //save 99%
362                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
363                                         
364                                 }
365                                 
366                                 //output quan value
367                                 out5 << i+1 << '\t';                            
368                                 for (int u = 0; u < temp.size(); u++) {   out5 << temp[u] << '\t'; }
369                                 out5 << endl;
370                                 
371                                 quantiles[i] = temp;
372                                 
373                         }
374
375                         mothurOut("Done."); mothurOutEndLine();
376                         
377                 }
378                         
379                 //free memory
380                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
381                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
382                         
383                 delete distcalculator;
384                 delete decalc;
385         }
386         catch(exception& e) {
387                 errorOut(e, "Pintail", "getChimeras");
388                 exit(1);
389         }
390 }
391
392 //***************************************************************************************************************
393
394 vector<float> Pintail::readFreq() {
395         try {
396         
397                 ifstream in;
398                 openInputFile(consfile, in);
399                 
400                 vector<float> prob;
401                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
402                 
403                 //read in probabilities and store in vector
404                 int pos; float num; 
405                 
406                 while(!in.eof()){
407                         
408                         in >> pos >> num;
409                         
410                         if (h.count(pos) > 0) {
411                                 float Pi;
412                                 Pi =  (num - 0.25) / 0.75; 
413                         
414                                 //cannot have probability less than 0.
415                                 if (Pi < 0) { Pi = 0.0; }
416
417                                 //do you want this spot
418                                 prob.push_back(Pi);  
419                         }
420                         
421                         gobble(in);
422                 }
423                 
424                 in.close();
425                 return prob;
426                 
427         }
428         catch(exception& e) {
429                 errorOut(e, "Pintail", "readFreq");
430                 exit(1);
431         }
432 }
433
434 //***************************************************************************************************************
435 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
436 vector<Sequence*> Pintail::findPairs(int start, int end) {
437         try {
438                 
439                 vector<Sequence*> seqsMatches;  
440                 
441                 for(int i = start; i < end; i++){
442                         
443                         vector<Sequence*> copy = decalc->findClosest(querySeqs[i], templateSeqs, 1);
444                         seqsMatches.push_back(copy[0]);
445                 }
446                 
447                 return seqsMatches;
448         
449         }
450         catch(exception& e) {
451                 errorOut(e, "Pintail", "findPairs");
452                 exit(1);
453         }
454 }
455
456 /**************************************************************************************************/
457
458 void Pintail::createProcessesSpots() {
459         try {
460 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
461                 int process = 0;
462                 vector<int> processIDS;
463                 
464                 //loop through and create all the processes you want
465                 while (process != processors) {
466                         int pid = fork();
467                         
468                         if (pid > 0) {
469                                 processIDS.push_back(pid);  
470                                 process++;
471                         }else if (pid == 0){
472                                 
473                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
474                                 
475                                         //chops off beginning and end of sequences so they both start and end with a base
476                                         map<int, int> trim;
477
478                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
479                                         trimmed[j] = trim;
480                                         
481                                 }
482
483                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
484                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
485                                         it = trimmed[i].begin();
486                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
487                                 }
488                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
489                                 
490                                 //write out data to file so parent can read it
491                                 ofstream out;
492                                 string s = toString(getpid()) + ".temp";
493                                 openOutputFile(s, out);
494                                 
495                                 //output windowsForeachQuery
496                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
497                                         out << windowsForeachQuery[i].size() << '\t';
498                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
499                                                 out << windowsForeachQuery[i][j] << '\t';
500                                         }
501                                         out << endl;
502                                 }
503                         
504                                 //output windowSizes
505                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
506                                         out << windowSizes[i] << '\t';
507                                 }
508                                 out << endl;
509                                 
510                                 //output trimmed values
511                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
512                                         it = trimmed[i].begin();                                        
513                                         out << it->first << '\t' << it->second << endl;
514                                 }
515                                 out.close();
516                                 
517                                 exit(0);
518                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
519                 }
520                 
521                 //force parent to wait until all the processes are done
522                 for (int i=0;i<processors;i++) { 
523                         int temp = processIDS[i];
524                         wait(&temp);
525                 }
526                 
527                 //get data created by processes
528                 for (int i=0;i<processors;i++) { 
529                         ifstream in;
530                         string s = toString(processIDS[i]) + ".temp";
531                         openInputFile(s, in);
532                         
533                         int size = lines[i]->end - lines[i]->start;
534                                         
535                         int count = lines[i]->start;
536                         for (int m = 0; m < size; m++) {
537                                 int num;
538                                 in >> num;
539                         
540                                 vector<int> win;  int w;
541                                 for (int j = 0; j < num; j++) {
542                                         in >> w;
543                                         win.push_back(w);
544                                 }
545                         
546                                 windowsForeachQuery[count] = win;
547                                 count++;
548                                 gobble(in);
549                         }
550                 
551                         gobble(in);
552                         count = lines[i]->start;
553                         for (int m = 0; m < size; m++) {
554                                 int num;
555                                 in >> num;
556                                 
557                                 windowSizes[count] = num;
558                                 count++;
559                         }
560                         
561                         gobble(in);
562                         
563                         count = lines[i]->start;
564                         for (int m = 0; m < size; m++) {
565                                 int front, back;
566                                 in >> front >> back;
567                         
568                                 map<int, int> t;
569                                 
570                                 t[front] = back;
571                                 
572                                 trimmed[count] = t;
573                                 count++;
574                                 
575                                 gobble(in);
576                         }
577
578                         
579                         in.close();
580                         remove(s.c_str());
581                 }
582                         
583         
584 #else
585                 for (int j = 0; j < bestfit.size(); j++) {
586                         //chops off beginning and end of sequences so they both start and end with a base
587                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
588                 }
589
590                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
591                                 it = trimmed[i].begin();
592                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
593                                 windowsForeachQuery[i] = win;
594                 }
595
596 #endif          
597         }
598         catch(exception& e) {
599                 errorOut(e, "Pintail", "createProcessesSpots");
600                 exit(1);
601         }
602 }
603 /**************************************************************************************************/
604
605 void Pintail::createProcessesPairs() {
606         try {
607 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
608                 int process = 0;
609                 vector<int> processIDS;
610                 
611                 //loop through and create all the processes you want
612                 while (process != processors) {
613                         int pid = fork();
614                         
615                         if (pid > 0) {
616                                 processIDS.push_back(pid);  
617                                 process++;
618                         }else if (pid == 0){
619                                 
620                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
621                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
622                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
623                                 
624                                 //write out data to file so parent can read it
625                                 ofstream out;
626                                 string s = toString(getpid()) + ".temp";
627                                 openOutputFile(s, out);
628                                 
629                                 //output range and size
630                                 out << bestfit.size() << endl;
631                                 
632                                 //output pairs
633                                 for (int i = 0; i < bestfit.size(); i++) {
634                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
635                                 }
636                                 out.close();
637                                 
638                                 exit(0);
639                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
640                 }
641                 
642                 //force parent to wait until all the processes are done
643                 for (int i=0;i<processors;i++) { 
644                         int temp = processIDS[i];
645                         wait(&temp);
646                 }
647                 
648                 //get data created by processes
649                 for (int i=0;i<processors;i++) { 
650                         ifstream in;
651                         string s = toString(processIDS[i]) + ".temp";
652                         openInputFile(s, in);
653                         
654                         int size;
655                         in >> size;  gobble(in);
656                                 
657                         //get pairs
658                         int count = lines[i]->start;
659                         for (int m = 0; m < size; m++) {
660                                 Sequence* temp = new Sequence(in);
661                                 bestfit[count] = temp;
662                         
663                                 count++;
664                                 gobble(in);
665                         }
666                         
667                         in.close();
668                         remove(s.c_str());
669                 }
670                         
671         
672 #else
673                 bestfit = findPairs(lines[0]->start, lines[0]->end);
674 #endif          
675         }
676         catch(exception& e) {
677                 errorOut(e, "Pintail", "createProcessesPairs");
678                 exit(1);
679         }
680 }
681 /**************************************************************************************************/
682
683 void Pintail::createProcesses() {
684         try {
685 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
686                 int process = 0;
687                 vector<int> processIDS;
688                 
689                 //loop through and create all the processes you want
690                 while (process != processors) {
691                         int pid = fork();
692                         
693                         if (pid > 0) {
694                                 processIDS.push_back(pid);  
695                                 process++;
696                         }else if (pid == 0){
697                                 
698                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
699                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
700                                         
701                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
702                                         obsDistance[i] = obsi;
703                                 
704                                         //calc Qav
705                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
706                                         
707                                         //get alpha
708                                         float alpha = decalc->getCoef(obsDistance[i], q);
709                                         
710                                         //find expected
711                                         vector<float> exp = decalc->calcExpected(q, alpha);
712                                         expectedDistance[i] = exp;
713                                         
714                                         //get de and deviation
715                                         float dei = decalc->calcDE(obsi, exp);
716                                         DE[i] = dei;
717                                         
718                                         it = trimmed[i].begin();
719                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
720                                         deviation[i] = dist;
721                                 }
722                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
723                                 
724                                 //write out data to file so parent can read it
725                                 ofstream out;
726                                 string s = toString(getpid()) + ".temp";
727                                 openOutputFile(s, out);
728                                 
729                                 int size = lines[process]->end - lines[process]->start;
730                                 out << size << endl;
731                                                                 
732                                 //output observed distances
733                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
734                                         out << obsDistance[i].size() << '\t';
735                                         for (int j = 0; j < obsDistance[i].size(); j++) {
736                                                 out << obsDistance[i][j] << '\t';
737                                         }
738                                         out << endl;
739                                 }
740                                 
741                                 
742                                 //output expected distances
743                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
744                                         out << expectedDistance[i].size() << '\t';
745                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
746                                                 out << expectedDistance[i][j] << '\t';
747                                         }
748                                         out << endl;
749                                 }
750
751                         
752                                 //output de values
753                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
754                                         out << DE[i] << '\t';
755                                 }
756                                 out << endl;    
757                                 
758                                 //output de values
759                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
760                                         out << deviation[i] << '\t';
761                                 }
762                                 out << endl;
763                                 
764                                 out.close();
765
766                                 exit(0);
767                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
768                 }
769                 
770                 //force parent to wait until all the processes are done
771                 for (int i=0;i<processors;i++) { 
772                         int temp = processIDS[i];
773                         wait(&temp);
774                 }
775                 
776                 //get data created by processes
777                 for (int i=0;i<processors;i++) { 
778                         ifstream in;
779                         string s = toString(processIDS[i]) + ".temp";
780                         openInputFile(s, in);
781                         
782                         int size;
783                         in >> size;  gobble(in);
784                         
785                         //get observed distances
786                         int count = lines[i]->start;
787                         for (int m = 0; m < size; m++) {
788                                 int num;
789                                 in >> num;
790                         
791                                 vector<float> obs;  float w;
792                                 for (int j = 0; j < num; j++) {
793                                         in >> w;
794                                         obs.push_back(w);
795                                 }
796                         
797                                 obsDistance[count] = obs;
798                                 count++;
799                                 gobble(in);
800                         }
801                         
802                         gobble(in);
803                         
804                         //get expected distances
805                         count = lines[i]->start;
806                         for (int m = 0; m < size; m++) {
807                                 int num;
808                                 in >> num;
809                         
810                                 vector<float> exp;  float w;
811                                 for (int j = 0; j < num; j++) {
812                                         in >> w;
813                                         exp.push_back(w);
814                                 }
815                         
816                                 expectedDistance[count] = exp;
817                                 count++;
818                                 gobble(in);
819                         }
820
821                         gobble(in);
822                         
823                         count = lines[i]->start;
824                         for (int m = 0; m < size; m++) {
825                                 float num;
826                                 in >> num;
827                                 
828                                 DE[count] = num;
829                                 count++;
830                         }
831                         
832                         gobble(in);
833                         
834                         count = lines[i]->start;
835                         for (int m = 0; m < size; m++) {
836                                 float num;
837                                 in >> num;
838                                 
839                                 deviation[count] = num;
840                                 count++;
841                         }
842
843                         in.close();
844                         remove(s.c_str());
845                 }
846
847                                 
848 #else
849                         mothurOut("Calculating observed distance... "); cout.flush();
850                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
851                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
852                                 obsDistance[i] = obsi;
853                         }
854                         mothurOut("Done."); mothurOutEndLine();
855                         
856                         
857                         
858                         mothurOut("Finding variability... "); cout.flush();
859                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
860                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
861                                 Qav[i] = q;
862                         }
863                         mothurOut("Done."); mothurOutEndLine();
864                         
865                         
866                         
867                         mothurOut("Calculating alpha... "); cout.flush();
868                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
869                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
870                                 seqCoef.push_back(alpha);
871                         }
872                         mothurOut("Done."); mothurOutEndLine();
873                 
874                 
875                 
876                         mothurOut("Calculating expected distance... "); cout.flush();
877                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
878                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
879                                 expectedDistance[i] = exp;
880                         }
881                         mothurOut("Done."); mothurOutEndLine();
882                         
883                         
884                         
885                         mothurOut("Finding deviation... "); cout.flush();
886                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
887                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
888                                 DE[i] = de;
889                                 
890                                 it = trimmed[i].begin();
891                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
892                                 deviation[i] = dist;
893                         }
894                         mothurOut("Done."); mothurOutEndLine();
895
896 #endif          
897         }
898         catch(exception& e) {
899                 errorOut(e, "Pintail", "createProcesses");
900                 exit(1);
901         }
902 }
903
904
905 /**************************************************************************************************/
906
907 void Pintail::createProcessesQuan() {
908         try {
909 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
910                 int process = 0;
911                 vector<int> processIDS;
912                                 
913                 //loop through and create all the processes you want
914                 while (process != processors) {
915                         int pid = fork();
916                         
917                         if (pid > 0) {
918                                 processIDS.push_back(pid);  
919                                 process++;
920                         }else if (pid == 0){
921                                 
922                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
923                                 
924                                 //write out data to file so parent can read it
925                                 ofstream out;
926                                 string s = toString(getpid()) + ".temp";
927                                 openOutputFile(s, out);
928                                 
929                                                                 
930                                 //output observed distances
931                                 for (int i = 0; i < quantilesMembers.size(); i++) {
932                                         out << quantilesMembers[i].size() << '\t';
933                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
934                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
935                                         }
936                                         out << endl;
937                                 }
938                                 
939                                 out.close();
940                                 
941                                 exit(0);
942                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
943                 }
944                 
945                 //force parent to wait until all the processes are done
946                 for (int i=0;i<processors;i++) { 
947                         int temp = processIDS[i];
948                         wait(&temp);
949                 }
950
951                 //get data created by processes
952                 for (int i=0;i<processors;i++) { 
953                         ifstream in;
954                         string s = toString(processIDS[i]) + ".temp";
955                         openInputFile(s, in);
956                         
957                         vector< vector<quanMember> > quan; 
958                         quan.resize(100);
959                         
960                         //get quantiles
961                         for (int m = 0; m < quan.size(); m++) {
962                                 int num;
963                                 in >> num; 
964                                 
965                                 gobble(in);
966
967                                 vector<quanMember> q;  float w; int b, n;
968                                 for (int j = 0; j < num; j++) {
969                                         in >> w >> b >> n;
970         //cout << w << '\t' << b << '\t' n << endl;
971                                         quanMember newMember(w, b, n);
972                                         q.push_back(newMember);
973                                 }
974 //cout << "here" << endl;
975                                 quan[m] = q;
976 //cout << "now here" << endl;
977                                 gobble(in);
978                         }
979                         
980         
981                         //save quan in quantiles
982                         for (int j = 0; j < quan.size(); j++) {
983                                 //put all values of q[i] into quan[i]
984                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
985                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
986                         }
987                                         
988                         in.close();
989                         remove(s.c_str());
990                 }
991                 
992 #else
993                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
994 #endif          
995         }
996         catch(exception& e) {
997                 errorOut(e, "Pintail", "createProcessesQuan");
998                 exit(1);
999         }
1000 }
1001
1002
1003 //***************************************************************************************************************
1004
1005