]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
fixed bug with aligner in database class, worked on chimeras
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12
13 //***************************************************************************************************************
14
15 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Pintail::~Pintail() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22         }
23         catch(exception& e) {
24                 errorOut(e, "Pintail", "~Pintail");
25                 exit(1);
26         }
27 }       
28 //***************************************************************************************************************
29 void Pintail::print(ostream& out) {
30         try {
31                 
32                 for (int i = 0; i < querySeqs.size(); i++) {
33                         
34                         int index = ceil(deviation[i]);
35                         
36                         //is your DE value higher than the 95%
37                         string chimera;
38                         if (DE[i] > quantiles[index][4])        {       chimera = "Yes";        }
39                         else                                                            {       chimera = "No";         }
40                         
41                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
42                         if (chimera == "Yes") {
43                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
44                         }
45                         out << "Observed\t";
46                         
47                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
48                         out << endl;
49                         
50                         out << "Expected\t";
51                         
52                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
53                         out << endl;
54                         
55                 }
56         }
57         catch(exception& e) {
58                 errorOut(e, "Pintail", "print");
59                 exit(1);
60         }
61 }
62
63 //***************************************************************************************************************
64 void Pintail::getChimeras() {
65         try {
66                 
67                 //read in query sequences and subject sequences
68                 mothurOut("Reading sequences and template file... "); cout.flush();
69                 querySeqs = readSeqs(fastafile);
70                 templateSeqs = readSeqs(templateFile);
71                 mothurOut("Done."); mothurOutEndLine();
72                 
73                 int numSeqs = querySeqs.size();
74                 
75                 obsDistance.resize(numSeqs);
76                 expectedDistance.resize(numSeqs);
77                 seqCoef.resize(numSeqs);
78                 DE.resize(numSeqs);
79                 Qav.resize(numSeqs);
80                 bestfit.resize(numSeqs);
81                 deviation.resize(numSeqs);
82                 trimmed.resize(numSeqs);
83                 windowSizes.resize(numSeqs, window);
84                 windowSizesTemplate.resize(templateSeqs.size(), window);
85                 windowsForeachQuery.resize(numSeqs);
86                 h.resize(numSeqs);
87                 quantiles.resize(100);  //one for every percent mismatch
88                 
89                 //break up file if needed
90                 int linesPerProcess = numSeqs / processors ;
91                 
92                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
93                         //find breakup of sequences for all times we will Parallelize
94                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
95                         else {
96                                 //fill line pairs
97                                 for (int i = 0; i < (processors-1); i++) {                      
98                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
99                                 }
100                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
101                                 int i = processors - 1;
102                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
103                         }
104                         
105                         //find breakup of templatefile for quantiles
106                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
107                         else { 
108                                 for (int i = 0; i < processors; i++) {
109                                         templateLines.push_back(new linePair());
110                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
111                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
112                                 }
113                         }
114                 #else
115                         lines.push_back(new linePair(0, numSeqs));
116                         templateLines.push_back(new linePair(0, templateSeqs.size()));
117                 #endif
118                 
119                 distcalculator = new ignoreGaps();
120                 decalc = new DeCalculator();
121                 
122                 decalc->setMask(seqMask);
123                 
124                 //mask querys
125                 for (int i = 0; i < querySeqs.size(); i++) {
126                         decalc->runMask(querySeqs[i]);
127                 }
128                 
129                 //mask templates
130                 for (int i = 0; i < templateSeqs.size(); i++) {
131                         decalc->runMask(templateSeqs[i]);
132                 }
133                 
134 for (int i = 0; i < lines.size(); i++) { cout << "line pair " << i << " = " << lines[i]->start << '\t' << lines[i]->end << endl;  }
135                                 
136                 if (processors == 1) { 
137                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
138                         bestfit = findPairs(lines[0]->start, lines[0]->end);
139                         
140                         //ex.align matches from wigeon
141 for (int m = 0; m < templateSeqs.size(); m++)  {
142         if (templateSeqs[m]->getName() == "159481") {  bestfit[17] = *(templateSeqs[m]); }
143         if (templateSeqs[m]->getName() == "100137") {  bestfit[16] = *(templateSeqs[m]); }
144         if (templateSeqs[m]->getName() == "112956") {  bestfit[15] = *(templateSeqs[m]); }
145         if (templateSeqs[m]->getName() == "102326") {  bestfit[14] = *(templateSeqs[m]); }
146         if (templateSeqs[m]->getName() == "66229") {  bestfit[13] = *(templateSeqs[m]); }
147         if (templateSeqs[m]->getName() == "206276") {  bestfit[12] = *(templateSeqs[m]); }
148     if (templateSeqs[m]->getName() == "63607") {  bestfit[11] = *(templateSeqs[m]); }
149         if (templateSeqs[m]->getName() == "7056") {  bestfit[10] = *(templateSeqs[m]); }
150         if (templateSeqs[m]->getName() == "7088") {  bestfit[9] = *(templateSeqs[m]); }
151         if (templateSeqs[m]->getName() == "17553") {  bestfit[8] = *(templateSeqs[m]); }
152         if (templateSeqs[m]->getName() == "131723") {  bestfit[7] = *(templateSeqs[m]); }
153         if (templateSeqs[m]->getName() == "69013") {  bestfit[6] = *(templateSeqs[m]); }
154         if (templateSeqs[m]->getName() == "24543") {  bestfit[5] = *(templateSeqs[m]); }
155         if (templateSeqs[m]->getName() == "27824") {  bestfit[4] = *(templateSeqs[m]); }
156         if (templateSeqs[m]->getName() == "1456") {  bestfit[3] = *(templateSeqs[m]); }
157         if (templateSeqs[m]->getName() == "1456") {  bestfit[2] = *(templateSeqs[m]); }
158         if (templateSeqs[m]->getName() == "141312") {  bestfit[1] = *(templateSeqs[m]); }
159         if (templateSeqs[m]->getName() == "141312") {  bestfit[0] = *(templateSeqs[m]); }
160
161
162 }
163                         
164                         for (int j = 0; j < bestfit.size(); j++) { 
165                                 //chops off beginning and end of sequences so they both start and end with a base
166                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
167                         }
168                         mothurOut("Done."); mothurOutEndLine();
169                         
170                         mothurOut("Finding window breaks... "); cout.flush();
171                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
172                                 it = trimmed[i].begin();
173 //cout << "trimmed = " << it->first << '\t' << it->second << endl;
174                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
175                                 windowsForeachQuery[i] = win;
176                         }
177                         mothurOut("Done."); mothurOutEndLine();
178                 
179                 }else {         createProcessesSpots();         }
180
181                 //find P
182                 mothurOut("Getting conservation... "); cout.flush();
183                 if (consfile == "") { 
184                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the quantiles to a .prob file so that you can input them using the conservation parameter next time you run this command.  Providing the .prob file will dramatically improve speed.    "); cout.flush();
185                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
186                         mothurOut("Done."); mothurOutEndLine();
187                 }else                           {   probabilityProfile = readFreq();                      }
188                 
189                 //make P into Q
190                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];      }
191                 mothurOut("Done."); mothurOutEndLine();
192                 
193                 if (processors == 1) { 
194                                                 
195                         mothurOut("Calculating observed distance... "); cout.flush();
196                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
197         //cout << querySeqs[i]->getName() << '\t' << bestfit[i].getName() << " windows = " << windowsForeachQuery[i].size() << " size = " << windowSizes[i] << endl;
198                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
199                                 obsDistance[i] = obsi;
200                         }
201                         mothurOut("Done."); mothurOutEndLine();
202                         
203                         
204                         mothurOut("Finding variability... "); cout.flush();
205                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
206                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
207
208                                 Qav[i] = q;
209 //cout << i+1 << endl;
210 //for (int j = 0; j < Qav[i].size(); j++) {
211         //cout << Qav[i][j] << '\t';
212 //}
213 //cout << endl << endl;
214
215                         }
216                         mothurOut("Done."); mothurOutEndLine();
217                         
218                         
219                         mothurOut("Calculating alpha... "); cout.flush();
220                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
221                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
222 //cout << i+1 << "\tcoef = " << alpha << endl;
223                                 seqCoef[i] = alpha;
224                         }
225                         mothurOut("Done."); mothurOutEndLine();
226                 
227                 
228                         mothurOut("Calculating expected distance... "); cout.flush();
229                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
230                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
231                                 expectedDistance[i] = exp;
232                         }
233                         mothurOut("Done."); mothurOutEndLine();
234                         
235                         
236                         mothurOut("Finding deviation... "); cout.flush();
237                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
238                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
239                                 DE[i] = de;
240                                 
241                                 it = trimmed[i].begin();
242                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
243                                 deviation[i] = dist;
244                         }
245                         mothurOut("Done."); mothurOutEndLine();
246                         
247                 } 
248                 else {          createProcesses();              }
249                 
250                 
251                 //quantiles are used to determine whether the de values found indicate a chimera
252                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
253                 //combination of sequences in the template
254                 if (quanfile != "") {  quantiles =  readQuantiles();  }
255                 else {
256                         
257                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
258                         if (processors == 1) { 
259                                 quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
260                         }else {         createProcessesQuan();          }
261                         
262                         ofstream out4;
263                         string o = getRootName(templateFile) + "quan";
264                         
265                         openOutputFile(o, out4);
266                         
267                         //adjust quantiles
268                         for (int i = 0; i < quantiles.size(); i++) {
269                                 if (quantiles[i].size() == 0) {
270                                         //in case this is not a distance found in your template files
271                                         for (int g = 0; g < 6; g++) {
272                                                 quantiles[i].push_back(0.0);
273                                         }
274                                 }else{
275                                         
276                                         sort(quantiles[i].begin(), quantiles[i].end());
277                                         
278                                         vector<float> temp;
279                                         //save 10%
280                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)]);
281                                         //save 25%
282                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)]);
283                                         //save 50%
284                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)]);
285                                         //save 75%
286                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)]);
287                                         //save 95%
288                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)]);
289                                         //save 99%
290                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)]);
291                                         
292                                         quantiles[i] = temp;
293                                 }
294                                 
295                                 //output quan value
296                                 out4 << i+1 << '\t';                            
297                                 for (int u = 0; u < quantiles[i].size(); u++) {   out4 << quantiles[i][u] << '\t'; }
298                                 out4 << endl;
299
300                         }
301                         
302                         mothurOut("Done."); mothurOutEndLine();
303                 }
304         
305                 //free memory
306                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
307                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
308                         
309                 delete distcalculator;
310                 delete decalc;
311         }
312         catch(exception& e) {
313                 errorOut(e, "Pintail", "getChimeras");
314                 exit(1);
315         }
316 }
317
318 //***************************************************************************************************************
319
320 vector<float> Pintail::readFreq() {
321         try {
322         
323                 ifstream in;
324                 openInputFile(consfile, in);
325                 
326                 vector<float> prob;
327                 
328                 //read in probabilities and store in vector
329                 int pos; float num; 
330                 
331                 while(!in.eof()){
332                         
333                         in >> pos >> num;
334                         
335                         //do you want this spot
336                         prob.push_back(num);  
337                         
338                         gobble(in);
339                 }
340                 
341                 in.close();
342                 return prob;
343                 
344         }
345         catch(exception& e) {
346                 errorOut(e, "Pintail", "readFreq");
347                 exit(1);
348         }
349 }
350
351 //***************************************************************************************************************
352
353 vector< vector<float> > Pintail::readQuantiles() {
354         try {
355         
356                 ifstream in;
357                 openInputFile(quanfile, in);
358                 
359                 vector< vector<float> > quan;
360         
361                 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
362                 
363                 while(!in.eof()){
364                         
365                         in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
366                         
367                         vector <float> temp;
368                         
369                         temp.push_back(ten); 
370                         temp.push_back(twentyfive);
371                         temp.push_back(fifty);
372                         temp.push_back(seventyfive);
373                         temp.push_back(ninetyfive);
374                         temp.push_back(ninetynine);
375                         
376                         quan.push_back(temp);  
377                         
378                         gobble(in);
379                 }
380                 
381                 in.close();
382                 return quan;
383                 
384         }
385         catch(exception& e) {
386                 errorOut(e, "Pintail", "readQuantiles");
387                 exit(1);
388         }
389 }
390 //***************************************************************************************************************
391 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
392 vector<Sequence> Pintail::findPairs(int start, int end) {
393         try {
394                 
395                 vector<Sequence> seqsMatches;  
396                 
397                 for(int i = start; i < end; i++){
398                 
399                         float smallest = 10000.0;
400                         Sequence query = *(querySeqs[i]);
401                         Sequence match;
402                         
403                         for(int j = 0; j < templateSeqs.size(); j++){
404                                 
405                                 Sequence temp = *(templateSeqs[j]);
406                                 
407                                 distcalculator->calcDist(query, temp);
408                                 float dist = distcalculator->getDist();
409                                 
410                                 if (dist < smallest) { 
411                                         match = *(templateSeqs[j]);
412                                         smallest = dist;
413                                 }
414                         }
415                         
416                         seqsMatches.push_back(match);
417                 }
418                 
419                 return seqsMatches;
420         
421         }
422         catch(exception& e) {
423                 errorOut(e, "Pintail", "findPairs");
424                 exit(1);
425         }
426 }
427
428 /**************************************************************************************************/
429
430 void Pintail::createProcessesSpots() {
431         try {
432 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
433                 int process = 0;
434                 vector<int> processIDS;
435                 
436                 //loop through and create all the processes you want
437                 while (process != processors) {
438                         int pid = fork();
439                         
440                         if (pid > 0) {
441                                 processIDS.push_back(pid);  
442                                 process++;
443                         }else if (pid == 0){
444                                 
445                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
446                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
447                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
448                                 
449                                 int count = lines[process]->start;
450                                 for (int j = 0; j < bestfit.size(); j++) {
451                                 
452                                         //chops off beginning and end of sequences so they both start and end with a base
453                                         map<int, int> trim;
454                                         decalc->trimSeqs(querySeqs[count], bestfit[j], trim); 
455                                         trimmed[count] = trim;
456                                         
457                                         count++;
458                                 }
459
460                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
461                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
462                                         it = trimmed[i].begin();
463                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
464                                 }
465                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
466                                 
467                                 //write out data to file so parent can read it
468                                 ofstream out;
469                                 string s = toString(pid) + ".temp";
470                                 openOutputFile(s, out);
471                                 
472                                 //output range and size
473                                 out << bestfit.size() << endl;
474                                 
475                                 //output pairs
476                                 for (int i = 0; i < bestfit.size(); i++) {
477                                         out << ">" << bestfit[i].getName() << endl << bestfit[i].getAligned() << endl;
478                                 }
479                                 
480                                 //output windowsForeachQuery
481                                 for (int i = 0; i < windowsForeachQuery.size(); i++) {
482                                         out << windowsForeachQuery[i].size() << '\t';
483                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
484                                                 out << windowsForeachQuery[i][j] << '\t';
485                                         }
486                                         out << endl;
487                                 }
488                                 
489                                 //output windowSizes
490                                 for (int i = 0; i < windowSizes.size(); i++) {
491                                         out << windowSizes[i] << '\t';
492                                 }
493                                 out << endl;                    
494                                 out.close();
495                                 
496                                 exit(0);
497                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
498                 }
499                 
500                 //force parent to wait until all the processes are done
501                 for (int i=0;i<processors;i++) { 
502                         int temp = processIDS[i];
503                         wait(&temp);
504                 }
505                 
506                 //get data created by processes
507                 for (int i=0;i<processors;i++) { 
508                         ifstream in;
509                         string s = toString(processIDS[i]) + ".temp";
510                         openInputFile(s, in);
511                         
512                         int size;
513                         in >> size;  gobble(in);
514                         
515                         //get pairs
516                         int count = lines[i]->start;
517                         for (int m = 0; m < size; m++) {
518                                 Sequence temp(in);
519                                 bestfit[count] = temp;
520                                 
521                                 count++;
522                                 gobble(in);
523                         }
524                                 
525                         gobble(in);
526                         
527                         count = lines[i]->start;
528                         for (int m = 0; m < size; m++) {
529                                 int num;
530                                 in >> num;
531                                 
532                                 vector<int> win;  int w;
533                                 for (int j = 0; j < num; j++) {
534                                         in >> w;
535                                         win.push_back(w);
536                                 }
537                                 
538                                 windowsForeachQuery[count] = win;
539                                 count++;
540                                 gobble(in);
541                         }
542                         
543                         gobble(in);
544                         count = lines[i]->start;
545                         for (int i = 0; i < size; i++) {
546                                 int num;
547                                 in >> num;
548                                 
549                                 windowSizes[count] = num;
550                                 count++;
551                         }
552                         
553                         in.close();
554                 }
555                         
556                 
557 #else
558                 bestfit = findPairs(lines[0]->start, lines[0]->end);
559                 for (int j = 0; j < bestfit.size(); j++) {
560                                 //chops off beginning and end of sequences so they both start and end with a base
561                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
562                 }
563
564                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
565                                 it = trimmed[i].begin();
566                                 map<int, int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
567                                 windows[i] = win;
568                 }
569
570 #endif          
571         }
572         catch(exception& e) {
573                 errorOut(e, "Pintail", "createProcessesSpots");
574                 exit(1);
575         }
576 }
577
578
579 /**************************************************************************************************/
580
581 void Pintail::createProcesses() {
582         try {
583 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
584                 int process = 0;
585                 vector<int> processIDS;
586                 
587                 vector< vector<float> > exp;  exp.resize(querySeqs.size());
588                 vector<float> de; de.resize(querySeqs.size());
589                 vector< vector<float> > obs; obs.resize(querySeqs.size());
590                 vector<float> dev; dev.resize(querySeqs.size());
591                 
592                 
593                 //loop through and create all the processes you want
594                 while (process != processors) {
595                         int pid = fork();
596                         
597                         if (pid > 0) {
598                                 processIDS.push_back(pid);  
599                                 process++;
600                         }else if (pid == 0){
601                                 
602                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
603                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
604                                         
605                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
606                                         obs[i] = obsi;
607                                 
608                                         //calc Qav
609                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
610                                         
611                                         //get alpha
612                                         float alpha = decalc->getCoef(obsDistance[i], q);
613                                         
614                                         //find expected
615                                         vector<float> exp = decalc->calcExpected(q, alpha);
616                                         expectedDistance[i] = exp;
617                                         
618                                         //get de and deviation
619                                         float dei = decalc->calcDE(obsi, exp);
620                                         de[i] = dei;
621                                         
622                                         it = trimmed[i].begin();
623                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
624                                         dev[i] = dist;
625                                 }
626                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
627                                 
628                                 exit(0);
629                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
630                 }
631                 
632                 //force parent to wait until all the processes are done
633                 for (int i=0;i<processors;i++) { 
634                         int temp = processIDS[i];
635                         wait(&temp);
636                 }
637                 
638                 obsDistance = obs;
639                 expectedDistance = exp;
640                 DE = de;
641                 deviation = dev;
642                 
643 #else
644                         mothurOut("Calculating observed distance... "); cout.flush();
645                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
646                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windows[i], windowSizes[i]);
647                                 obsDistance[i] = obsi;
648                         }
649                         mothurOut("Done."); mothurOutEndLine();
650                         
651                         
652                         
653                         mothurOut("Finding variability... "); cout.flush();
654                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
655                                 vector<float> q = decalc->findQav(windows[i], windowSizes[i], probabilityProfile, h[i]);
656                                 Qav[i] = q;
657                         }
658                         mothurOut("Done."); mothurOutEndLine();
659                         
660                         
661                         
662                         mothurOut("Calculating alpha... "); cout.flush();
663                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
664                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
665                                 seqCoef.push_back(alpha);
666                         }
667                         mothurOut("Done."); mothurOutEndLine();
668                 
669                 
670                 
671                         mothurOut("Calculating expected distance... "); cout.flush();
672                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
673                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
674                                 expectedDistance[i] = exp;
675                         }
676                         mothurOut("Done."); mothurOutEndLine();
677                         
678                         
679                         
680                         mothurOut("Finding deviation... "); cout.flush();
681                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
682                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
683                                 DE[i] = de;
684                                 
685                                 it = trimmed[i].begin();
686                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
687                                 deviation[i] = dist;
688                         }
689                         mothurOut("Done."); mothurOutEndLine();
690
691 #endif          
692         }
693         catch(exception& e) {
694                 errorOut(e, "Pintail", "createProcesses");
695                 exit(1);
696         }
697 }
698
699
700 /**************************************************************************************************/
701
702 void Pintail::createProcessesQuan() {
703         try {
704 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
705                 int process = 0;
706                 vector<int> processIDS;
707                 vector< vector<float> > quan; quan.resize(100);
708                                 
709                 //loop through and create all the processes you want
710                 while (process != processors) {
711                         int pid = fork();
712                         
713                         if (pid > 0) {
714                                 processIDS.push_back(pid);  
715                                 process++;
716                         }else if (pid == 0){
717                                 
718                                 vector< vector<float> > q = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
719                                 
720                                 for (int i = 0; i < q.size(); i++) {
721                                         //put all values of q[i] into quan[i]
722                                         quan[i].insert(quan[i].begin(), q[i].begin(), q[i].end());
723                                 }
724                                 
725                                 for (int i = 0; i < quan.size(); i++) {
726                                         cout << i+1 << '\t';
727                                         for (int j = 0; j < quan[i].size(); j++) {  cout << quan[i][j] << '\t';  }
728                                         cout << endl;
729                                 }
730
731                                 exit(0);
732                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
733                 }
734                 
735                 //force parent to wait until all the processes are done
736                 for (int i=0;i<processors;i++) { 
737                         int temp = processIDS[i];
738                         wait(&temp);
739                 }
740                 
741                 quantiles = quan;
742 #else
743                 quantiles = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
744 #endif          
745         }
746         catch(exception& e) {
747                 errorOut(e, "Pintail", "createProcessesQuan");
748                 exit(1);
749         }
750 }
751
752
753 //***************************************************************************************************************
754
755