]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
removing mallard
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12
13 //***************************************************************************************************************
14
15 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Pintail::~Pintail() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
23         }
24         catch(exception& e) {
25                 errorOut(e, "Pintail", "~Pintail");
26                 exit(1);
27         }
28 }       
29 //***************************************************************************************************************
30 void Pintail::print(ostream& out) {
31         try {
32                 
33                 mothurOutEndLine();
34                 
35                 for (int i = 0; i < querySeqs.size(); i++) {
36                         
37                         int index = ceil(deviation[i]);
38                         
39                         //is your DE value higher than the 95%
40                         string chimera;
41                         if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
42                         else                                                                    {       chimera = "No";         }
43                         
44                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
45                         if (chimera == "Yes") {
46                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
47                         }
48                         out << "Observed\t";
49                         
50                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
51                         out << endl;
52                         
53                         out << "Expected\t";
54                         
55                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
56                         out << endl;
57                         
58                 }
59         }
60         catch(exception& e) {
61                 errorOut(e, "Pintail", "print");
62                 exit(1);
63         }
64 }
65
66 //***************************************************************************************************************
67 void Pintail::getChimeras() {
68         try {
69                 
70                 //read in query sequences and subject sequences
71                 mothurOut("Reading sequences and template file... "); cout.flush();
72                 querySeqs = readSeqs(fastafile);
73                 templateSeqs = readSeqs(templateFile);
74                 mothurOut("Done."); mothurOutEndLine();
75                 
76                 int numSeqs = querySeqs.size();
77                 
78                 obsDistance.resize(numSeqs);
79                 expectedDistance.resize(numSeqs);
80                 seqCoef.resize(numSeqs);
81                 DE.resize(numSeqs);
82                 Qav.resize(numSeqs);
83                 bestfit.resize(numSeqs);
84                 deviation.resize(numSeqs);
85                 trimmed.resize(numSeqs);
86                 windowSizes.resize(numSeqs, window);
87                 windowSizesTemplate.resize(templateSeqs.size(), window);
88                 windowsForeachQuery.resize(numSeqs);
89                 h.resize(numSeqs);
90                 quantiles.resize(100);  //one for every percent mismatch
91                 quantilesMembers.resize(100);  //one for every percent mismatch
92                 makeCompliant.resize(templateSeqs.size(), 0.0);
93                 
94                 //break up file if needed
95                 int linesPerProcess = numSeqs / processors ;
96                 
97                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
98                         //find breakup of sequences for all times we will Parallelize
99                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
100                         else {
101                                 //fill line pairs
102                                 for (int i = 0; i < (processors-1); i++) {                      
103                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
104                                 }
105                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
106                                 int i = processors - 1;
107                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
108                         }
109                         
110                         //find breakup of templatefile for quantiles
111                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
112                         else { 
113                                 for (int i = 0; i < processors; i++) {
114                                         templateLines.push_back(new linePair());
115                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
116                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
117                                 }
118                         }
119                 #else
120                         lines.push_back(new linePair(0, numSeqs));
121                         templateLines.push_back(new linePair(0, templateSeqs.size()));
122                 #endif
123                 
124                 distcalculator = new ignoreGaps();
125                 decalc = new DeCalculator();
126                 
127                 //if the user does enter a mask then you want to keep all the spots in the alignment
128                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
129                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
130                 
131                 decalc->setMask(seqMask);
132                 
133                 //find pairs
134                 if (processors == 1) { 
135                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
136                         bestfit = findPairs(lines[0]->start, lines[0]->end);
137                         mothurOut("Done."); mothurOutEndLine();
138                 }else {         createProcessesPairs();         }
139                 
140                 
141                 //find P
142                 mothurOut("Getting conservation... "); cout.flush();
143                 if (consfile == "") { 
144                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
145                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
146                         mothurOut("Done."); mothurOutEndLine();
147                 }else                           {   probabilityProfile = readFreq();                      }
148
149                 //make P into Q
150                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
151                 mothurOut("Done."); mothurOutEndLine();
152                 
153                 //mask sequences if the user wants to 
154                 if (seqMask != "") {
155                         //mask querys
156                         for (int i = 0; i < querySeqs.size(); i++) {
157                                 decalc->runMask(querySeqs[i]);
158                         }
159                 
160                         //mask templates
161                         for (int i = 0; i < templateSeqs.size(); i++) {
162                                 decalc->runMask(templateSeqs[i]);
163                         }
164                         
165                         for (int i = 0; i < bestfit.size(); i++) { 
166                                 decalc->runMask(bestfit[i]);
167                         }
168
169                 }
170                                                 
171                 if (processors == 1) { 
172         
173                         for (int j = 0; j < bestfit.size(); j++) { 
174                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
175                         }
176                         
177                         mothurOut("Finding window breaks... "); cout.flush();
178                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
179                                 it = trimmed[i].begin();
180                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
181                                 windowsForeachQuery[i] = win;
182                         }
183                         mothurOut("Done."); mothurOutEndLine();
184                 
185                 }else {         createProcessesSpots();         }
186                 
187                 if (processors == 1) { 
188                                                 
189                         mothurOut("Calculating observed distance... "); cout.flush();
190                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
191                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
192         
193                                 obsDistance[i] = obsi;
194                         }
195                         mothurOut("Done."); mothurOutEndLine();
196                         
197                         
198                         mothurOut("Finding variability... "); cout.flush();
199                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
200                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
201
202                                 Qav[i] = q;
203                         }
204                         mothurOut("Done."); mothurOutEndLine();
205                         
206                         
207                         mothurOut("Calculating alpha... "); cout.flush();
208                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
209                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
210                                 seqCoef[i] = alpha;
211                         }
212                         mothurOut("Done."); mothurOutEndLine();
213                 
214                 
215                         mothurOut("Calculating expected distance... "); cout.flush();
216                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
217                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
218                                 expectedDistance[i] = exp;
219                         }
220                         mothurOut("Done."); mothurOutEndLine();
221                         
222                         
223                         mothurOut("Finding deviation... "); cout.flush();
224                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
225                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
226                                 DE[i] = de;
227                         
228                                 it = trimmed[i].begin();
229                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
230                                 deviation[i] = dist;
231                         }
232                         mothurOut("Done."); mothurOutEndLine();
233                         
234                 } 
235                 else {          createProcesses();              }
236                 
237                 //quantiles are used to determine whether the de values found indicate a chimera
238                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
239                 //combination of sequences in the template
240                 if (quanfile != "") {  quantiles =  readQuantiles();  }
241                 else {
242                         
243                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
244                         if (processors == 1) { 
245                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size(), makeCompliant);
246                         }else {         createProcessesQuan();          }
247                         
248                                                 
249                         //decided against this because we were having trouble setting the sensitivity... may want to revisit this...
250                         //quantiles = decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
251                         
252                         ofstream out4;
253                         string o;
254                         
255                         o = getRootName(templateFile) + "quan";
256                         
257                         openOutputFile(o, out4);
258                         
259                         //adjust quantiles
260                         for (int i = 0; i < quantiles.size(); i++) {
261                                 vector<float> temp;
262                                 
263                                 if (quantiles[i].size() == 0) {
264                                         //in case this is not a distance found in your template files
265                                         for (int g = 0; g < 6; g++) {
266                                                 temp.push_back(0.0);
267                                         }
268                                 }else{
269                                         
270                                         sort(quantiles[i].begin(), quantiles[i].end());
271                                         
272                                         //save 10%
273                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)]);
274                                         //save 25%
275                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)]);
276                                         //save 50%
277                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)]);
278                                         //save 75%
279                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)]);
280                                         //save 95%
281                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)]);
282                                         //save 99%
283                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)]);
284                                         
285                                 }
286                                 
287                                 //output quan value
288                                 out4 << i+1 << '\t';                            
289                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
290                                 out4 << endl;
291                                 
292                                 quantiles[i] = temp;
293                                 
294                         }
295                         
296                         out4.close();
297                         
298                         mothurOut("Done."); mothurOutEndLine();
299                         
300                 }
301                         
302                 //free memory
303                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
304                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
305                         
306                 delete distcalculator;
307                 delete decalc;
308         }
309         catch(exception& e) {
310                 errorOut(e, "Pintail", "getChimeras");
311                 exit(1);
312         }
313 }
314
315 //***************************************************************************************************************
316
317 vector<float> Pintail::readFreq() {
318         try {
319         
320                 ifstream in;
321                 openInputFile(consfile, in);
322                 
323                 vector<float> prob;
324                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
325                 
326                 //read in probabilities and store in vector
327                 int pos; float num; 
328                 
329                 while(!in.eof()){
330                         
331                         in >> pos >> num;
332                         
333                         if (h.count(pos) > 0) {
334                                 float Pi;
335                                 Pi =  (num - 0.25) / 0.75; 
336                         
337                                 //cannot have probability less than 0.
338                                 if (Pi < 0) { Pi = 0.0; }
339
340                                 //do you want this spot
341                                 prob.push_back(Pi);  
342                         }
343                         
344                         gobble(in);
345                 }
346                 
347                 in.close();
348                 return prob;
349                 
350         }
351         catch(exception& e) {
352                 errorOut(e, "Pintail", "readFreq");
353                 exit(1);
354         }
355 }
356
357 //***************************************************************************************************************
358
359 vector< vector<float> > Pintail::readQuantiles() {
360         try {
361         
362                 ifstream in;
363                 openInputFile(quanfile, in);
364                 
365                 vector< vector<float> > quan;
366         
367                 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
368                 
369                 while(!in.eof()){
370                         
371                         in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
372                         
373                         vector <float> temp;
374                         
375                         temp.push_back(ten); 
376                         temp.push_back(twentyfive);
377                         temp.push_back(fifty);
378                         temp.push_back(seventyfive);
379                         temp.push_back(ninetyfive);
380                         temp.push_back(ninetynine);
381                         
382                         quan.push_back(temp);  
383                         
384                         gobble(in);
385                 }
386                 
387                 in.close();
388                 return quan;
389                 
390         }
391         catch(exception& e) {
392                 errorOut(e, "Pintail", "readQuantiles");
393                 exit(1);
394         }
395 }
396 //***************************************************************************************************************
397 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
398 vector<Sequence*> Pintail::findPairs(int start, int end) {
399         try {
400                 
401                 vector<Sequence*> seqsMatches;  
402                 
403                 for(int i = start; i < end; i++){
404                 
405                         float smallest = 10000.0;
406                         Sequence query = *(querySeqs[i]);
407                         Sequence* match;
408                         
409                         for(int j = 0; j < templateSeqs.size(); j++){
410                                 
411                                 Sequence temp = *(templateSeqs[j]);
412                                 
413                                 distcalculator->calcDist(query, temp);
414                                 float dist = distcalculator->getDist();
415                                 
416                                 if (dist < smallest) { 
417                                         match = templateSeqs[j];
418                                         smallest = dist;
419                                 }
420                         }
421                         
422                         //make copy so trimSeqs doesn't overtrim
423                         Sequence* copy = new Sequence(match->getName(), match->getAligned());
424                         
425                         seqsMatches.push_back(copy);
426                 }
427                 
428                 return seqsMatches;
429         
430         }
431         catch(exception& e) {
432                 errorOut(e, "Pintail", "findPairs");
433                 exit(1);
434         }
435 }
436
437 /**************************************************************************************************/
438
439 void Pintail::createProcessesSpots() {
440         try {
441 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
442                 int process = 0;
443                 vector<int> processIDS;
444                 
445                 //loop through and create all the processes you want
446                 while (process != processors) {
447                         int pid = fork();
448                         
449                         if (pid > 0) {
450                                 processIDS.push_back(pid);  
451                                 process++;
452                         }else if (pid == 0){
453                                 
454                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
455                                 
456                                         //chops off beginning and end of sequences so they both start and end with a base
457                                         map<int, int> trim;
458
459                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
460                                         trimmed[j] = trim;
461                                         
462                                 }
463
464                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
465                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
466                                         it = trimmed[i].begin();
467                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
468                                 }
469                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
470                                 
471                                 //write out data to file so parent can read it
472                                 ofstream out;
473                                 string s = toString(getpid()) + ".temp";
474                                 openOutputFile(s, out);
475                                 
476                                 //output windowsForeachQuery
477                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
478                                         out << windowsForeachQuery[i].size() << '\t';
479                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
480                                                 out << windowsForeachQuery[i][j] << '\t';
481                                         }
482                                         out << endl;
483                                 }
484                         
485                                 //output windowSizes
486                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
487                                         out << windowSizes[i] << '\t';
488                                 }
489                                 out << endl;
490                                 
491                                 //output trimmed values
492                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
493                                         it = trimmed[i].begin();                                        
494                                         out << it->first << '\t' << it->second << endl;
495                                 }
496                                 out.close();
497                                 
498                                 exit(0);
499                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
500                 }
501                 
502                 //force parent to wait until all the processes are done
503                 for (int i=0;i<processors;i++) { 
504                         int temp = processIDS[i];
505                         wait(&temp);
506                 }
507                 
508                 //get data created by processes
509                 for (int i=0;i<processors;i++) { 
510                         ifstream in;
511                         string s = toString(processIDS[i]) + ".temp";
512                         openInputFile(s, in);
513                         
514                         int size = lines[i]->end - lines[i]->start;
515                                         
516                         int count = lines[i]->start;
517                         for (int m = 0; m < size; m++) {
518                                 int num;
519                                 in >> num;
520                         
521                                 vector<int> win;  int w;
522                                 for (int j = 0; j < num; j++) {
523                                         in >> w;
524                                         win.push_back(w);
525                                 }
526                         
527                                 windowsForeachQuery[count] = win;
528                                 count++;
529                                 gobble(in);
530                         }
531                 
532                         gobble(in);
533                         count = lines[i]->start;
534                         for (int m = 0; m < size; m++) {
535                                 int num;
536                                 in >> num;
537                                 
538                                 windowSizes[count] = num;
539                                 count++;
540                         }
541                         
542                         gobble(in);
543                         
544                         count = lines[i]->start;
545                         for (int m = 0; m < size; m++) {
546                                 int front, back;
547                                 in >> front >> back;
548                         
549                                 map<int, int> t;
550                                 
551                                 t[front] = back;
552                                 
553                                 trimmed[count] = t;
554                                 count++;
555                                 
556                                 gobble(in);
557                         }
558
559                         
560                         in.close();
561                         remove(s.c_str());
562                 }
563                         
564         
565 #else
566                 for (int j = 0; j < bestfit.size(); j++) {
567                         //chops off beginning and end of sequences so they both start and end with a base
568                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
569                 }
570
571                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
572                                 it = trimmed[i].begin();
573                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
574                                 windowsForeachQuery[i] = win;
575                 }
576
577 #endif          
578         }
579         catch(exception& e) {
580                 errorOut(e, "Pintail", "createProcessesSpots");
581                 exit(1);
582         }
583 }
584 /**************************************************************************************************/
585
586 void Pintail::createProcessesPairs() {
587         try {
588 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589                 int process = 0;
590                 vector<int> processIDS;
591                 
592                 //loop through and create all the processes you want
593                 while (process != processors) {
594                         int pid = fork();
595                         
596                         if (pid > 0) {
597                                 processIDS.push_back(pid);  
598                                 process++;
599                         }else if (pid == 0){
600                                 
601                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
602                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
603                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
604                                 
605                                 //write out data to file so parent can read it
606                                 ofstream out;
607                                 string s = toString(getpid()) + ".temp";
608                                 openOutputFile(s, out);
609                                 
610                                 //output range and size
611                                 out << bestfit.size() << endl;
612                                 
613                                 //output pairs
614                                 for (int i = 0; i < bestfit.size(); i++) {
615                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
616                                 }
617                                 out.close();
618                                 
619                                 exit(0);
620                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
621                 }
622                 
623                 //force parent to wait until all the processes are done
624                 for (int i=0;i<processors;i++) { 
625                         int temp = processIDS[i];
626                         wait(&temp);
627                 }
628                 
629                 //get data created by processes
630                 for (int i=0;i<processors;i++) { 
631                         ifstream in;
632                         string s = toString(processIDS[i]) + ".temp";
633                         openInputFile(s, in);
634                         
635                         int size;
636                         in >> size;  gobble(in);
637                                 
638                         //get pairs
639                         int count = lines[i]->start;
640                         for (int m = 0; m < size; m++) {
641                                 Sequence* temp = new Sequence(in);
642                                 bestfit[count] = temp;
643                         
644                                 count++;
645                                 gobble(in);
646                         }
647                         
648                         in.close();
649                         remove(s.c_str());
650                 }
651                         
652         
653 #else
654                 bestfit = findPairs(lines[0]->start, lines[0]->end);
655 #endif          
656         }
657         catch(exception& e) {
658                 errorOut(e, "Pintail", "createProcessesPairs");
659                 exit(1);
660         }
661 }
662 /**************************************************************************************************/
663
664 void Pintail::createProcesses() {
665         try {
666 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
667                 int process = 0;
668                 vector<int> processIDS;
669                 
670                 //loop through and create all the processes you want
671                 while (process != processors) {
672                         int pid = fork();
673                         
674                         if (pid > 0) {
675                                 processIDS.push_back(pid);  
676                                 process++;
677                         }else if (pid == 0){
678                                 
679                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
680                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
681                                         
682                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
683                                         obsDistance[i] = obsi;
684                                 
685                                         //calc Qav
686                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
687                                         
688                                         //get alpha
689                                         float alpha = decalc->getCoef(obsDistance[i], q);
690                                         
691                                         //find expected
692                                         vector<float> exp = decalc->calcExpected(q, alpha);
693                                         expectedDistance[i] = exp;
694                                         
695                                         //get de and deviation
696                                         float dei = decalc->calcDE(obsi, exp);
697                                         DE[i] = dei;
698                                         
699                                         it = trimmed[i].begin();
700                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
701                                         deviation[i] = dist;
702                                 }
703                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
704                                 
705                                 //write out data to file so parent can read it
706                                 ofstream out;
707                                 string s = toString(getpid()) + ".temp";
708                                 openOutputFile(s, out);
709                                 
710                                 int size = lines[process]->end - lines[process]->start;
711                                 out << size << endl;
712                                                                 
713                                 //output observed distances
714                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
715                                         out << obsDistance[i].size() << '\t';
716                                         for (int j = 0; j < obsDistance[i].size(); j++) {
717                                                 out << obsDistance[i][j] << '\t';
718                                         }
719                                         out << endl;
720                                 }
721                                 
722                                 
723                                 //output expected distances
724                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
725                                         out << expectedDistance[i].size() << '\t';
726                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
727                                                 out << expectedDistance[i][j] << '\t';
728                                         }
729                                         out << endl;
730                                 }
731
732                         
733                                 //output de values
734                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
735                                         out << DE[i] << '\t';
736                                 }
737                                 out << endl;    
738                                 
739                                 //output de values
740                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
741                                         out << deviation[i] << '\t';
742                                 }
743                                 out << endl;
744                                 
745                                 out.close();
746
747                                 exit(0);
748                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
749                 }
750                 
751                 //force parent to wait until all the processes are done
752                 for (int i=0;i<processors;i++) { 
753                         int temp = processIDS[i];
754                         wait(&temp);
755                 }
756                 
757                 //get data created by processes
758                 for (int i=0;i<processors;i++) { 
759                         ifstream in;
760                         string s = toString(processIDS[i]) + ".temp";
761                         openInputFile(s, in);
762                         
763                         int size;
764                         in >> size;  gobble(in);
765                         
766                         //get observed distances
767                         int count = lines[i]->start;
768                         for (int m = 0; m < size; m++) {
769                                 int num;
770                                 in >> num;
771                         
772                                 vector<float> obs;  float w;
773                                 for (int j = 0; j < num; j++) {
774                                         in >> w;
775                                         obs.push_back(w);
776                                 }
777                         
778                                 obsDistance[count] = obs;
779                                 count++;
780                                 gobble(in);
781                         }
782                         
783                         gobble(in);
784                         
785                         //get expected distances
786                         count = lines[i]->start;
787                         for (int m = 0; m < size; m++) {
788                                 int num;
789                                 in >> num;
790                         
791                                 vector<float> exp;  float w;
792                                 for (int j = 0; j < num; j++) {
793                                         in >> w;
794                                         exp.push_back(w);
795                                 }
796                         
797                                 expectedDistance[count] = exp;
798                                 count++;
799                                 gobble(in);
800                         }
801
802                         gobble(in);
803                         
804                         count = lines[i]->start;
805                         for (int m = 0; m < size; m++) {
806                                 float num;
807                                 in >> num;
808                                 
809                                 DE[count] = num;
810                                 count++;
811                         }
812                         
813                         gobble(in);
814                         
815                         count = lines[i]->start;
816                         for (int m = 0; m < size; m++) {
817                                 float num;
818                                 in >> num;
819                                 
820                                 deviation[count] = num;
821                                 count++;
822                         }
823
824                         in.close();
825                         remove(s.c_str());
826                 }
827
828                                 
829 #else
830                         mothurOut("Calculating observed distance... "); cout.flush();
831                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
832                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
833                                 obsDistance[i] = obsi;
834                         }
835                         mothurOut("Done."); mothurOutEndLine();
836                         
837                         
838                         
839                         mothurOut("Finding variability... "); cout.flush();
840                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
841                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
842                                 Qav[i] = q;
843                         }
844                         mothurOut("Done."); mothurOutEndLine();
845                         
846                         
847                         
848                         mothurOut("Calculating alpha... "); cout.flush();
849                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
850                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
851                                 seqCoef.push_back(alpha);
852                         }
853                         mothurOut("Done."); mothurOutEndLine();
854                 
855                 
856                 
857                         mothurOut("Calculating expected distance... "); cout.flush();
858                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
859                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
860                                 expectedDistance[i] = exp;
861                         }
862                         mothurOut("Done."); mothurOutEndLine();
863                         
864                         
865                         
866                         mothurOut("Finding deviation... "); cout.flush();
867                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
868                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
869                                 DE[i] = de;
870                                 
871                                 it = trimmed[i].begin();
872                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
873                                 deviation[i] = dist;
874                         }
875                         mothurOut("Done."); mothurOutEndLine();
876
877 #endif          
878         }
879         catch(exception& e) {
880                 errorOut(e, "Pintail", "createProcesses");
881                 exit(1);
882         }
883 }
884
885
886 /**************************************************************************************************/
887
888 void Pintail::createProcessesQuan() {
889         try {
890 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
891                 int process = 0;
892                 vector<int> processIDS;
893                                 
894                 //loop through and create all the processes you want
895                 while (process != processors) {
896                         int pid = fork();
897                         
898                         if (pid > 0) {
899                                 processIDS.push_back(pid);  
900                                 process++;
901                         }else if (pid == 0){
902                                 
903                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end, makeCompliant);
904                                 
905                                 //write out data to file so parent can read it
906                                 ofstream out;
907                                 string s = toString(getpid()) + ".temp";
908                                 openOutputFile(s, out);
909                                 
910                                                                 
911                                 //output observed distances
912                                 for (int i = 0; i < quantilesMembers.size(); i++) {
913                                         out << quantilesMembers[i].size() << '\t';
914                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
915                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
916                                         }
917                                         out << endl;
918                                 }
919                                 
920                                 out.close();
921                                 
922                                 exit(0);
923                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
924                 }
925                 
926                 //force parent to wait until all the processes are done
927                 for (int i=0;i<processors;i++) { 
928                         int temp = processIDS[i];
929                         wait(&temp);
930                 }
931
932                 //get data created by processes
933                 for (int i=0;i<processors;i++) { 
934                         ifstream in;
935                         string s = toString(processIDS[i]) + ".temp";
936                         openInputFile(s, in);
937                         
938                         vector< vector<quanMember> > quan; 
939                         quan.resize(100);
940                         
941                         //get quantiles
942                         for (int m = 0; m < quan.size(); m++) {
943                                 int num;
944                                 in >> num; 
945                                 
946                                 gobble(in);
947
948                                 vector<quanMember> q;  float w; int b, n;
949                                 for (int j = 0; j < num; j++) {
950                                         in >> w >> b >> n;
951         //cout << w << '\t' << b << '\t' n << endl;
952                                         quanMember newMember(w, b, n);
953                                         q.push_back(newMember);
954                                 }
955 //cout << "here" << endl;
956                                 quan[m] = q;
957 //cout << "now here" << endl;
958                                 gobble(in);
959                         }
960                         
961         
962                         //save quan in quantiles
963                         for (int j = 0; j < quan.size(); j++) {
964                                 //put all values of q[i] into quan[i]
965                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
966                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
967                         }
968                                         
969                         in.close();
970                         remove(s.c_str());
971                 }
972                 
973 #else
974                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size(), makeCompliant);
975 #endif          
976         }
977         catch(exception& e) {
978                 errorOut(e, "Pintail", "createProcessesQuan");
979                 exit(1);
980         }
981 }
982
983
984 //***************************************************************************************************************
985
986