]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
finished with ccode, returned bellerophon to last save before move, cleaned up pintai...
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareQuanMembers(quanMember left, quanMember right){
17         return (left.score < right.score);      
18
19 //***************************************************************************************************************
20
21 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
22 //***************************************************************************************************************
23
24 Pintail::~Pintail() {
25         try {
26                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
27                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
28                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
29         }
30         catch(exception& e) {
31                 errorOut(e, "Pintail", "~Pintail");
32                 exit(1);
33         }
34 }       
35 //***************************************************************************************************************
36 void Pintail::print(ostream& out) {
37         try {
38                 
39                 mothurOutEndLine();
40                 
41                 for (int i = 0; i < querySeqs.size(); i++) {
42                         
43                         int index = ceil(deviation[i]);
44                                                 
45                         //is your DE value higher than the 95%
46                         string chimera;
47                         if (quantiles[index][4] == 0.0) {
48                                 chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
49                         }else {
50                                 if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
51                                 else                                                                    {       chimera = "No";         }
52                         }
53                         
54                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
55                         if (chimera == "Yes") {
56                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
57                         }
58                         out << "Observed\t";
59                         
60                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
61                         out << endl;
62                         
63                         out << "Expected\t";
64                         
65                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
66                         out << endl;
67                         
68                 }
69         }
70         catch(exception& e) {
71                 errorOut(e, "Pintail", "print");
72                 exit(1);
73         }
74 }
75
76 //***************************************************************************************************************
77 void Pintail::getChimeras() {
78         try {
79                 
80                 //read in query sequences and subject sequences
81                 mothurOut("Reading sequences and template file... "); cout.flush();
82                 querySeqs = readSeqs(fastafile);
83                 templateSeqs = readSeqs(templateFile);
84                 mothurOut("Done."); mothurOutEndLine();
85                 
86                 int numSeqs = querySeqs.size();
87                 
88                 obsDistance.resize(numSeqs);
89                 expectedDistance.resize(numSeqs);
90                 seqCoef.resize(numSeqs);
91                 DE.resize(numSeqs);
92                 Qav.resize(numSeqs);
93                 bestfit.resize(numSeqs);
94                 deviation.resize(numSeqs);
95                 trimmed.resize(numSeqs);
96                 windowSizes.resize(numSeqs, window);
97                 windowSizesTemplate.resize(templateSeqs.size(), window);
98                 windowsForeachQuery.resize(numSeqs);
99                 h.resize(numSeqs);
100                 quantiles.resize(100);  //one for every percent mismatch
101                 quantilesMembers.resize(100);  //one for every percent mismatch
102                 
103                 //break up file if needed
104                 int linesPerProcess = numSeqs / processors ;
105                 
106                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107                         //find breakup of sequences for all times we will Parallelize
108                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
109                         else {
110                                 //fill line pairs
111                                 for (int i = 0; i < (processors-1); i++) {                      
112                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
113                                 }
114                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
115                                 int i = processors - 1;
116                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
117                         }
118                         
119                         //find breakup of templatefile for quantiles
120                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
121                         else { 
122                                 for (int i = 0; i < processors; i++) {
123                                         templateLines.push_back(new linePair());
124                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
125                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
126                                 }
127                         }
128                 #else
129                         lines.push_back(new linePair(0, numSeqs));
130                         templateLines.push_back(new linePair(0, templateSeqs.size()));
131                 #endif
132                 
133                 distcalculator = new eachGapDist();
134                 decalc = new DeCalculator();
135                 
136                 //if the user does enter a mask then you want to keep all the spots in the alignment
137                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
138                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
139                 
140                 decalc->setMask(seqMask);
141                 
142                 //find pairs
143                 if (processors == 1) { 
144                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
145                         bestfit = findPairs(lines[0]->start, lines[0]->end);
146                         mothurOut("Done."); mothurOutEndLine();
147                 }else {         createProcessesPairs();         }
148                 
149 /*string o = "foronlinepintailpairs-eachgap";
150 ofstream out7;
151 openOutputFile(o, out7);
152
153 for (int i = 0; i < bestfit.size(); i++) {
154         out7 << querySeqs[i]->getName() << endl;
155         out7 << querySeqs[i]->getUnaligned() << endl << endl;
156         
157         out7 << bestfit[i]->getName() << endl;
158         out7 << bestfit[i]->getUnaligned() << endl << endl << endl;
159 }               
160 out7.close();/*/        
161                 //find P
162                 mothurOut("Getting conservation... "); cout.flush();
163                 if (consfile == "") { 
164                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
165                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
166                         mothurOut("Done."); mothurOutEndLine();
167                 }else                           {   probabilityProfile = readFreq();                      }
168
169                 //make P into Q
170                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
171                 mothurOut("Done."); mothurOutEndLine();
172                 
173                 //mask sequences if the user wants to 
174                 if (seqMask != "") {
175                         //mask querys
176                         for (int i = 0; i < querySeqs.size(); i++) {
177                                 decalc->runMask(querySeqs[i]);
178                         }
179                 
180                         //mask templates
181                         for (int i = 0; i < templateSeqs.size(); i++) {
182                                 decalc->runMask(templateSeqs[i]);
183                         }
184                         
185                         for (int i = 0; i < bestfit.size(); i++) { 
186                                 decalc->runMask(bestfit[i]);
187                         }
188
189                 }
190                 
191                 if (filter) {
192                         vector<Sequence*> temp = templateSeqs;
193                         for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
194                         
195                         createFilter(temp);
196                         
197                         runFilter(querySeqs);
198                         runFilter(templateSeqs);
199                         runFilter(bestfit);
200                 }
201                                                                                 
202                                                                                                                                                 
203                 if (processors == 1) { 
204         
205                         for (int j = 0; j < bestfit.size(); j++) { 
206                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
207                         }
208                         
209                         mothurOut("Finding window breaks... "); cout.flush();
210                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
211                                 it = trimmed[i].begin();
212                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
213                                 windowsForeachQuery[i] = win;
214                         }
215                         mothurOut("Done."); mothurOutEndLine();
216                 
217                 }else {         createProcessesSpots();         }
218                 
219                 if (processors == 1) { 
220                                                 
221                         mothurOut("Calculating observed distance... "); cout.flush();
222                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
223                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
224         
225                                 obsDistance[i] = obsi;
226                         }
227                         mothurOut("Done."); mothurOutEndLine();
228                         
229                         
230                         mothurOut("Finding variability... "); cout.flush();
231                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
232                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
233
234                                 Qav[i] = q;
235                         }
236                         mothurOut("Done."); mothurOutEndLine();
237                         
238                         
239                         mothurOut("Calculating alpha... "); cout.flush();
240                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
241                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
242                                 seqCoef[i] = alpha;
243                         }
244                         mothurOut("Done."); mothurOutEndLine();
245                 
246                 
247                         mothurOut("Calculating expected distance... "); cout.flush();
248                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
249                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
250                                 expectedDistance[i] = exp;
251                         }
252                         mothurOut("Done."); mothurOutEndLine();
253                         
254                         
255                         mothurOut("Finding deviation... "); cout.flush();
256                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
257                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
258                                 DE[i] = de;
259                         
260                                 it = trimmed[i].begin();
261                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
262                                 deviation[i] = dist;
263                         }
264                         mothurOut("Done."); mothurOutEndLine();
265                         
266                 } 
267                 else {          createProcesses();              }
268                 
269                 //quantiles are used to determine whether the de values found indicate a chimera
270                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
271                 //combination of sequences in the template
272                 if (quanfile != "") {  quantiles = readQuantiles();  }
273                 else {
274                         
275                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
276                         if (processors == 1) { 
277                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
278                         }else {         createProcessesQuan();          }
279                 
280                         
281                         ofstream out4, out5;
282                         string noOutliers, outliers;
283                         
284                         if ((!filter) && (seqMask == "")) {
285                                 noOutliers = getRootName(templateFile) + "pintail.quan";
286                         }else if ((filter) && (seqMask == "")) { 
287                                 noOutliers = getRootName(templateFile) + "pintail.filtered.quan";
288                         }else if ((!filter) && (seqMask != "")) { 
289                                 noOutliers = getRootName(templateFile) + "pintail.masked.quan";
290                         }else if ((filter) && (seqMask != "")) { 
291                                 noOutliers = getRootName(templateFile) + "pintail.filtered.masked.quan";
292                         }
293
294                         //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
295                         
296                         /*openOutputFile(outliers, out4);
297                         
298                         //adjust quantiles
299                         for (int i = 0; i < quantilesMembers.size(); i++) {
300                                 vector<float> temp;
301                                 
302                                 if (quantilesMembers[i].size() == 0) {
303                                         //in case this is not a distance found in your template files
304                                         for (int g = 0; g < 6; g++) {
305                                                 temp.push_back(0.0);
306                                         }
307                                 }else{
308                                         
309                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
310                                         
311                                         //save 10%
312                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
313                                         //save 25%
314                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
315                                         //save 50%
316                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
317                                         //save 75%
318                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
319                                         //save 95%
320                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
321                                         //save 99%
322                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
323                                         
324                                 }
325                                 
326                                 //output quan value
327                                 out4 << i+1 << '\t';                            
328                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
329                                 out4 << endl;
330                                 
331                                 quantiles[i] = temp;
332                                 
333                         }
334                         
335                         out4.close();*/
336                         
337                         decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
338                         
339                         openOutputFile(noOutliers, out5);
340                         
341                         //adjust quantiles
342                         for (int i = 0; i < quantilesMembers.size(); i++) {
343                                 vector<float> temp;
344                                 
345                                 if (quantilesMembers[i].size() == 0) {
346                                         //in case this is not a distance found in your template files
347                                         for (int g = 0; g < 6; g++) {
348                                                 temp.push_back(0.0);
349                                         }
350                                 }else{
351                                         
352                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
353                                         
354                                         //save 10%
355                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
356                                         //save 25%
357                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
358                                         //save 50%
359                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
360                                         //save 75%
361                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
362                                         //save 95%
363                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
364                                         //save 99%
365                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
366                                         
367                                 }
368                                 
369                                 //output quan value
370                                 out5 << i+1 << '\t';                            
371                                 for (int u = 0; u < temp.size(); u++) {   out5 << temp[u] << '\t'; }
372                                 out5 << endl;
373                                 
374                                 quantiles[i] = temp;
375                                 
376                         }
377
378                         mothurOut("Done."); mothurOutEndLine();
379                         
380                 }
381                         
382                 //free memory
383                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
384                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
385                         
386                 delete distcalculator;
387                 delete decalc;
388         }
389         catch(exception& e) {
390                 errorOut(e, "Pintail", "getChimeras");
391                 exit(1);
392         }
393 }
394
395 //***************************************************************************************************************
396
397 vector<float> Pintail::readFreq() {
398         try {
399         
400                 ifstream in;
401                 openInputFile(consfile, in);
402                 
403                 vector<float> prob;
404                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
405                 
406                 //read in probabilities and store in vector
407                 int pos; float num; 
408                 
409                 while(!in.eof()){
410                         
411                         in >> pos >> num;
412                         
413                         if (h.count(pos) > 0) {
414                                 float Pi;
415                                 Pi =  (num - 0.25) / 0.75; 
416                         
417                                 //cannot have probability less than 0.
418                                 if (Pi < 0) { Pi = 0.0; }
419
420                                 //do you want this spot
421                                 prob.push_back(Pi);  
422                         }
423                         
424                         gobble(in);
425                 }
426                 
427                 in.close();
428                 return prob;
429                 
430         }
431         catch(exception& e) {
432                 errorOut(e, "Pintail", "readFreq");
433                 exit(1);
434         }
435 }
436
437 //***************************************************************************************************************
438 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
439 vector<Sequence*> Pintail::findPairs(int start, int end) {
440         try {
441                 
442                 vector<Sequence*> seqsMatches;  
443                 
444                 for(int i = start; i < end; i++){
445                 
446                         float smallest = 10000.0;
447                         Sequence query = *(querySeqs[i]);
448                         Sequence* match;
449                         
450                         for(int j = 0; j < templateSeqs.size(); j++){
451                                 
452                                 Sequence temp = *(templateSeqs[j]);
453                                 
454                                 distcalculator->calcDist(query, temp);
455                                 float dist = distcalculator->getDist();
456                                 
457                                 if (dist < smallest) { 
458                                         match = templateSeqs[j];
459                                         smallest = dist;
460                                 }
461                         }
462                         
463                         //make copy so trimSeqs doesn't overtrim
464                         Sequence* copy = new Sequence(match->getName(), match->getAligned());
465                         
466                         seqsMatches.push_back(copy);
467                 }
468                 
469                 return seqsMatches;
470         
471         }
472         catch(exception& e) {
473                 errorOut(e, "Pintail", "findPairs");
474                 exit(1);
475         }
476 }
477
478 /**************************************************************************************************/
479
480 void Pintail::createProcessesSpots() {
481         try {
482 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
483                 int process = 0;
484                 vector<int> processIDS;
485                 
486                 //loop through and create all the processes you want
487                 while (process != processors) {
488                         int pid = fork();
489                         
490                         if (pid > 0) {
491                                 processIDS.push_back(pid);  
492                                 process++;
493                         }else if (pid == 0){
494                                 
495                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
496                                 
497                                         //chops off beginning and end of sequences so they both start and end with a base
498                                         map<int, int> trim;
499
500                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
501                                         trimmed[j] = trim;
502                                         
503                                 }
504
505                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
506                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
507                                         it = trimmed[i].begin();
508                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
509                                 }
510                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
511                                 
512                                 //write out data to file so parent can read it
513                                 ofstream out;
514                                 string s = toString(getpid()) + ".temp";
515                                 openOutputFile(s, out);
516                                 
517                                 //output windowsForeachQuery
518                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
519                                         out << windowsForeachQuery[i].size() << '\t';
520                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
521                                                 out << windowsForeachQuery[i][j] << '\t';
522                                         }
523                                         out << endl;
524                                 }
525                         
526                                 //output windowSizes
527                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
528                                         out << windowSizes[i] << '\t';
529                                 }
530                                 out << endl;
531                                 
532                                 //output trimmed values
533                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
534                                         it = trimmed[i].begin();                                        
535                                         out << it->first << '\t' << it->second << endl;
536                                 }
537                                 out.close();
538                                 
539                                 exit(0);
540                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
541                 }
542                 
543                 //force parent to wait until all the processes are done
544                 for (int i=0;i<processors;i++) { 
545                         int temp = processIDS[i];
546                         wait(&temp);
547                 }
548                 
549                 //get data created by processes
550                 for (int i=0;i<processors;i++) { 
551                         ifstream in;
552                         string s = toString(processIDS[i]) + ".temp";
553                         openInputFile(s, in);
554                         
555                         int size = lines[i]->end - lines[i]->start;
556                                         
557                         int count = lines[i]->start;
558                         for (int m = 0; m < size; m++) {
559                                 int num;
560                                 in >> num;
561                         
562                                 vector<int> win;  int w;
563                                 for (int j = 0; j < num; j++) {
564                                         in >> w;
565                                         win.push_back(w);
566                                 }
567                         
568                                 windowsForeachQuery[count] = win;
569                                 count++;
570                                 gobble(in);
571                         }
572                 
573                         gobble(in);
574                         count = lines[i]->start;
575                         for (int m = 0; m < size; m++) {
576                                 int num;
577                                 in >> num;
578                                 
579                                 windowSizes[count] = num;
580                                 count++;
581                         }
582                         
583                         gobble(in);
584                         
585                         count = lines[i]->start;
586                         for (int m = 0; m < size; m++) {
587                                 int front, back;
588                                 in >> front >> back;
589                         
590                                 map<int, int> t;
591                                 
592                                 t[front] = back;
593                                 
594                                 trimmed[count] = t;
595                                 count++;
596                                 
597                                 gobble(in);
598                         }
599
600                         
601                         in.close();
602                         remove(s.c_str());
603                 }
604                         
605         
606 #else
607                 for (int j = 0; j < bestfit.size(); j++) {
608                         //chops off beginning and end of sequences so they both start and end with a base
609                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
610                 }
611
612                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
613                                 it = trimmed[i].begin();
614                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
615                                 windowsForeachQuery[i] = win;
616                 }
617
618 #endif          
619         }
620         catch(exception& e) {
621                 errorOut(e, "Pintail", "createProcessesSpots");
622                 exit(1);
623         }
624 }
625 /**************************************************************************************************/
626
627 void Pintail::createProcessesPairs() {
628         try {
629 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
630                 int process = 0;
631                 vector<int> processIDS;
632                 
633                 //loop through and create all the processes you want
634                 while (process != processors) {
635                         int pid = fork();
636                         
637                         if (pid > 0) {
638                                 processIDS.push_back(pid);  
639                                 process++;
640                         }else if (pid == 0){
641                                 
642                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
643                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
644                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
645                                 
646                                 //write out data to file so parent can read it
647                                 ofstream out;
648                                 string s = toString(getpid()) + ".temp";
649                                 openOutputFile(s, out);
650                                 
651                                 //output range and size
652                                 out << bestfit.size() << endl;
653                                 
654                                 //output pairs
655                                 for (int i = 0; i < bestfit.size(); i++) {
656                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
657                                 }
658                                 out.close();
659                                 
660                                 exit(0);
661                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
662                 }
663                 
664                 //force parent to wait until all the processes are done
665                 for (int i=0;i<processors;i++) { 
666                         int temp = processIDS[i];
667                         wait(&temp);
668                 }
669                 
670                 //get data created by processes
671                 for (int i=0;i<processors;i++) { 
672                         ifstream in;
673                         string s = toString(processIDS[i]) + ".temp";
674                         openInputFile(s, in);
675                         
676                         int size;
677                         in >> size;  gobble(in);
678                                 
679                         //get pairs
680                         int count = lines[i]->start;
681                         for (int m = 0; m < size; m++) {
682                                 Sequence* temp = new Sequence(in);
683                                 bestfit[count] = temp;
684                         
685                                 count++;
686                                 gobble(in);
687                         }
688                         
689                         in.close();
690                         remove(s.c_str());
691                 }
692                         
693         
694 #else
695                 bestfit = findPairs(lines[0]->start, lines[0]->end);
696 #endif          
697         }
698         catch(exception& e) {
699                 errorOut(e, "Pintail", "createProcessesPairs");
700                 exit(1);
701         }
702 }
703 /**************************************************************************************************/
704
705 void Pintail::createProcesses() {
706         try {
707 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
708                 int process = 0;
709                 vector<int> processIDS;
710                 
711                 //loop through and create all the processes you want
712                 while (process != processors) {
713                         int pid = fork();
714                         
715                         if (pid > 0) {
716                                 processIDS.push_back(pid);  
717                                 process++;
718                         }else if (pid == 0){
719                                 
720                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
721                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
722                                         
723                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
724                                         obsDistance[i] = obsi;
725                                 
726                                         //calc Qav
727                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
728                                         
729                                         //get alpha
730                                         float alpha = decalc->getCoef(obsDistance[i], q);
731                                         
732                                         //find expected
733                                         vector<float> exp = decalc->calcExpected(q, alpha);
734                                         expectedDistance[i] = exp;
735                                         
736                                         //get de and deviation
737                                         float dei = decalc->calcDE(obsi, exp);
738                                         DE[i] = dei;
739                                         
740                                         it = trimmed[i].begin();
741                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
742                                         deviation[i] = dist;
743                                 }
744                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
745                                 
746                                 //write out data to file so parent can read it
747                                 ofstream out;
748                                 string s = toString(getpid()) + ".temp";
749                                 openOutputFile(s, out);
750                                 
751                                 int size = lines[process]->end - lines[process]->start;
752                                 out << size << endl;
753                                                                 
754                                 //output observed distances
755                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
756                                         out << obsDistance[i].size() << '\t';
757                                         for (int j = 0; j < obsDistance[i].size(); j++) {
758                                                 out << obsDistance[i][j] << '\t';
759                                         }
760                                         out << endl;
761                                 }
762                                 
763                                 
764                                 //output expected distances
765                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
766                                         out << expectedDistance[i].size() << '\t';
767                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
768                                                 out << expectedDistance[i][j] << '\t';
769                                         }
770                                         out << endl;
771                                 }
772
773                         
774                                 //output de values
775                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
776                                         out << DE[i] << '\t';
777                                 }
778                                 out << endl;    
779                                 
780                                 //output de values
781                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
782                                         out << deviation[i] << '\t';
783                                 }
784                                 out << endl;
785                                 
786                                 out.close();
787
788                                 exit(0);
789                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
790                 }
791                 
792                 //force parent to wait until all the processes are done
793                 for (int i=0;i<processors;i++) { 
794                         int temp = processIDS[i];
795                         wait(&temp);
796                 }
797                 
798                 //get data created by processes
799                 for (int i=0;i<processors;i++) { 
800                         ifstream in;
801                         string s = toString(processIDS[i]) + ".temp";
802                         openInputFile(s, in);
803                         
804                         int size;
805                         in >> size;  gobble(in);
806                         
807                         //get observed distances
808                         int count = lines[i]->start;
809                         for (int m = 0; m < size; m++) {
810                                 int num;
811                                 in >> num;
812                         
813                                 vector<float> obs;  float w;
814                                 for (int j = 0; j < num; j++) {
815                                         in >> w;
816                                         obs.push_back(w);
817                                 }
818                         
819                                 obsDistance[count] = obs;
820                                 count++;
821                                 gobble(in);
822                         }
823                         
824                         gobble(in);
825                         
826                         //get expected distances
827                         count = lines[i]->start;
828                         for (int m = 0; m < size; m++) {
829                                 int num;
830                                 in >> num;
831                         
832                                 vector<float> exp;  float w;
833                                 for (int j = 0; j < num; j++) {
834                                         in >> w;
835                                         exp.push_back(w);
836                                 }
837                         
838                                 expectedDistance[count] = exp;
839                                 count++;
840                                 gobble(in);
841                         }
842
843                         gobble(in);
844                         
845                         count = lines[i]->start;
846                         for (int m = 0; m < size; m++) {
847                                 float num;
848                                 in >> num;
849                                 
850                                 DE[count] = num;
851                                 count++;
852                         }
853                         
854                         gobble(in);
855                         
856                         count = lines[i]->start;
857                         for (int m = 0; m < size; m++) {
858                                 float num;
859                                 in >> num;
860                                 
861                                 deviation[count] = num;
862                                 count++;
863                         }
864
865                         in.close();
866                         remove(s.c_str());
867                 }
868
869                                 
870 #else
871                         mothurOut("Calculating observed distance... "); cout.flush();
872                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
873                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
874                                 obsDistance[i] = obsi;
875                         }
876                         mothurOut("Done."); mothurOutEndLine();
877                         
878                         
879                         
880                         mothurOut("Finding variability... "); cout.flush();
881                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
882                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
883                                 Qav[i] = q;
884                         }
885                         mothurOut("Done."); mothurOutEndLine();
886                         
887                         
888                         
889                         mothurOut("Calculating alpha... "); cout.flush();
890                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
891                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
892                                 seqCoef.push_back(alpha);
893                         }
894                         mothurOut("Done."); mothurOutEndLine();
895                 
896                 
897                 
898                         mothurOut("Calculating expected distance... "); cout.flush();
899                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
900                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
901                                 expectedDistance[i] = exp;
902                         }
903                         mothurOut("Done."); mothurOutEndLine();
904                         
905                         
906                         
907                         mothurOut("Finding deviation... "); cout.flush();
908                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
909                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
910                                 DE[i] = de;
911                                 
912                                 it = trimmed[i].begin();
913                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
914                                 deviation[i] = dist;
915                         }
916                         mothurOut("Done."); mothurOutEndLine();
917
918 #endif          
919         }
920         catch(exception& e) {
921                 errorOut(e, "Pintail", "createProcesses");
922                 exit(1);
923         }
924 }
925
926
927 /**************************************************************************************************/
928
929 void Pintail::createProcessesQuan() {
930         try {
931 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
932                 int process = 0;
933                 vector<int> processIDS;
934                                 
935                 //loop through and create all the processes you want
936                 while (process != processors) {
937                         int pid = fork();
938                         
939                         if (pid > 0) {
940                                 processIDS.push_back(pid);  
941                                 process++;
942                         }else if (pid == 0){
943                                 
944                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
945                                 
946                                 //write out data to file so parent can read it
947                                 ofstream out;
948                                 string s = toString(getpid()) + ".temp";
949                                 openOutputFile(s, out);
950                                 
951                                                                 
952                                 //output observed distances
953                                 for (int i = 0; i < quantilesMembers.size(); i++) {
954                                         out << quantilesMembers[i].size() << '\t';
955                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
956                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
957                                         }
958                                         out << endl;
959                                 }
960                                 
961                                 out.close();
962                                 
963                                 exit(0);
964                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
965                 }
966                 
967                 //force parent to wait until all the processes are done
968                 for (int i=0;i<processors;i++) { 
969                         int temp = processIDS[i];
970                         wait(&temp);
971                 }
972
973                 //get data created by processes
974                 for (int i=0;i<processors;i++) { 
975                         ifstream in;
976                         string s = toString(processIDS[i]) + ".temp";
977                         openInputFile(s, in);
978                         
979                         vector< vector<quanMember> > quan; 
980                         quan.resize(100);
981                         
982                         //get quantiles
983                         for (int m = 0; m < quan.size(); m++) {
984                                 int num;
985                                 in >> num; 
986                                 
987                                 gobble(in);
988
989                                 vector<quanMember> q;  float w; int b, n;
990                                 for (int j = 0; j < num; j++) {
991                                         in >> w >> b >> n;
992         //cout << w << '\t' << b << '\t' n << endl;
993                                         quanMember newMember(w, b, n);
994                                         q.push_back(newMember);
995                                 }
996 //cout << "here" << endl;
997                                 quan[m] = q;
998 //cout << "now here" << endl;
999                                 gobble(in);
1000                         }
1001                         
1002         
1003                         //save quan in quantiles
1004                         for (int j = 0; j < quan.size(); j++) {
1005                                 //put all values of q[i] into quan[i]
1006                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
1007                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
1008                         }
1009                                         
1010                         in.close();
1011                         remove(s.c_str());
1012                 }
1013                 
1014 #else
1015                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
1016 #endif          
1017         }
1018         catch(exception& e) {
1019                 errorOut(e, "Pintail", "createProcessesQuan");
1020                 exit(1);
1021         }
1022 }
1023
1024
1025 //***************************************************************************************************************
1026
1027