]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
chimera.seqs pintail is working.
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12
13 //***************************************************************************************************************
14
15 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
16 //***************************************************************************************************************
17
18 Pintail::~Pintail() {
19         try {
20                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
21                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
22                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
23         }
24         catch(exception& e) {
25                 errorOut(e, "Pintail", "~Pintail");
26                 exit(1);
27         }
28 }       
29 //***************************************************************************************************************
30 void Pintail::print(ostream& out) {
31         try {
32                 
33                 mothurOutEndLine();
34                 
35                 for (int i = 0; i < querySeqs.size(); i++) {
36                         
37                         int index = ceil(deviation[i]);
38                         
39                         //is your DE value higher than the 95%
40                         string chimera;
41                         if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
42                         else                                                                    {       chimera = "No";         }
43                         
44                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
45                         if (chimera == "Yes") {
46                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
47                         }
48                         out << "Observed\t";
49                         
50                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
51                         out << endl;
52                         
53                         out << "Expected\t";
54                         
55                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
56                         out << endl;
57                         
58                 }
59         }
60         catch(exception& e) {
61                 errorOut(e, "Pintail", "print");
62                 exit(1);
63         }
64 }
65
66 //***************************************************************************************************************
67 void Pintail::getChimeras() {
68         try {
69                 
70                 //read in query sequences and subject sequences
71                 mothurOut("Reading sequences and template file... "); cout.flush();
72                 querySeqs = readSeqs(fastafile);
73                 templateSeqs = readSeqs(templateFile);
74                 mothurOut("Done."); mothurOutEndLine();
75                 
76                 int numSeqs = querySeqs.size();
77                 
78                 obsDistance.resize(numSeqs);
79                 expectedDistance.resize(numSeqs);
80                 seqCoef.resize(numSeqs);
81                 DE.resize(numSeqs);
82                 Qav.resize(numSeqs);
83                 bestfit.resize(numSeqs);
84                 deviation.resize(numSeqs);
85                 trimmed.resize(numSeqs);
86                 windowSizes.resize(numSeqs, window);
87                 windowSizesTemplate.resize(templateSeqs.size(), window);
88                 windowsForeachQuery.resize(numSeqs);
89                 h.resize(numSeqs);
90                 quantiles.resize(100);  //one for every percent mismatch
91                 quantilesMembers.resize(100);  //one for every percent mismatch
92                 
93                 //break up file if needed
94                 int linesPerProcess = numSeqs / processors ;
95                 
96                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
97                         //find breakup of sequences for all times we will Parallelize
98                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
99                         else {
100                                 //fill line pairs
101                                 for (int i = 0; i < (processors-1); i++) {                      
102                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
103                                 }
104                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
105                                 int i = processors - 1;
106                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
107                         }
108                         
109                         //find breakup of templatefile for quantiles
110                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
111                         else { 
112                                 for (int i = 0; i < processors; i++) {
113                                         templateLines.push_back(new linePair());
114                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
115                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
116                                 }
117                         }
118                 #else
119                         lines.push_back(new linePair(0, numSeqs));
120                         templateLines.push_back(new linePair(0, templateSeqs.size()));
121                 #endif
122                 
123                 distcalculator = new ignoreGaps();
124                 decalc = new DeCalculator();
125                 
126                 //if the user does enter a mask then you want to keep all the spots in the alignment
127                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
128                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
129                 
130                 decalc->setMask(seqMask);
131                 
132                 //find pairs
133                 if (processors == 1) { 
134                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
135                         bestfit = findPairs(lines[0]->start, lines[0]->end);
136                         mothurOut("Done."); mothurOutEndLine();
137                 }else {         createProcessesPairs();         }
138                 
139                 
140                 for (int j = 0; j < bestfit.size(); j++) { 
141                                 
142                                 //chops off beginning and end of sequences so they both start and end with a base
143                                 ofstream out;
144                                 string s = querySeqs[j]->getName();
145                         
146                                 openOutputFile(s, out);
147                                 out << ">" << querySeqs[j]->getName() << endl;
148                                 out << querySeqs[j]->getAligned() << endl;
149                                 out.close();
150                                 
151                                 string t =querySeqs[j]->getName() + ".ref";
152                                 openOutputFile(t, out);
153                                 out << ">" << bestfit[j]->getName() << endl;
154                                 out << bestfit[j]->getAligned() << endl;
155                                 out.close();    
156                 }
157
158                 
159                 //find P
160                 mothurOut("Getting conservation... "); cout.flush();
161                 if (consfile == "") { 
162                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
163                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
164                         mothurOut("Done."); mothurOutEndLine();
165                 }else                           {   probabilityProfile = readFreq();                      }
166
167                 //make P into Q
168                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
169                 mothurOut("Done."); mothurOutEndLine();
170                 
171                 //mask sequences if the user wants to 
172                 if (seqMask != "") {
173                         //mask querys
174                         for (int i = 0; i < querySeqs.size(); i++) {
175                                 decalc->runMask(querySeqs[i]);
176                         }
177                 
178                         //mask templates
179                         for (int i = 0; i < templateSeqs.size(); i++) {
180                                 decalc->runMask(templateSeqs[i]);
181                         }
182                         
183                         for (int i = 0; i < bestfit.size(); i++) { 
184                                 decalc->runMask(bestfit[i]);
185                         }
186
187                 }
188                                                 
189                 if (processors == 1) { 
190         
191                         for (int j = 0; j < bestfit.size(); j++) { 
192                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
193                         }
194                         
195                         mothurOut("Finding window breaks... "); cout.flush();
196                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
197                                 it = trimmed[i].begin();
198                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
199                                 windowsForeachQuery[i] = win;
200                         }
201                         mothurOut("Done."); mothurOutEndLine();
202                 
203                 }else {         createProcessesSpots();         }
204                 
205                 if (processors == 1) { 
206                                                 
207                         mothurOut("Calculating observed distance... "); cout.flush();
208                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
209                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
210         
211                                 obsDistance[i] = obsi;
212                         }
213                         mothurOut("Done."); mothurOutEndLine();
214                         
215                         
216                         mothurOut("Finding variability... "); cout.flush();
217                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
218                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
219
220                                 Qav[i] = q;
221                         }
222                         mothurOut("Done."); mothurOutEndLine();
223                         
224                         
225                         mothurOut("Calculating alpha... "); cout.flush();
226                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
227                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
228                                 seqCoef[i] = alpha;
229                         }
230                         mothurOut("Done."); mothurOutEndLine();
231                 
232                 
233                         mothurOut("Calculating expected distance... "); cout.flush();
234                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
235                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
236                                 expectedDistance[i] = exp;
237                         }
238                         mothurOut("Done."); mothurOutEndLine();
239                         
240                         
241                         mothurOut("Finding deviation... "); cout.flush();
242                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
243                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
244                                 DE[i] = de;
245                         
246                                 it = trimmed[i].begin();
247                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
248                                 deviation[i] = dist;
249                         }
250                         mothurOut("Done."); mothurOutEndLine();
251                         
252                 } 
253                 else {          createProcesses();              }
254                 
255                 //quantiles are used to determine whether the de values found indicate a chimera
256                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
257                 //combination of sequences in the template
258                 if (quanfile != "") {  quantiles =  readQuantiles();  }
259                 else {
260                         
261                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
262                         if (processors == 1) { 
263                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size(), makeCompliant);
264                         }else {         createProcessesQuan();          }
265                         
266                                                 
267                         //decided against this because we were having trouble setting the sensitivity... may want to revisit this...
268                         //quantiles = decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
269                         
270                         ofstream out4;
271                         string o;
272                         
273                         o = getRootName(templateFile) + "quan";
274                         
275                         openOutputFile(o, out4);
276                         
277                         //adjust quantiles
278                         for (int i = 0; i < quantiles.size(); i++) {
279                                 vector<float> temp;
280                                 
281                                 if (quantiles[i].size() == 0) {
282                                         //in case this is not a distance found in your template files
283                                         for (int g = 0; g < 6; g++) {
284                                                 temp.push_back(0.0);
285                                         }
286                                 }else{
287                                         
288                                         sort(quantiles[i].begin(), quantiles[i].end());
289                                         
290                                         //save 10%
291                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)]);
292                                         //save 25%
293                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)]);
294                                         //save 50%
295                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)]);
296                                         //save 75%
297                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)]);
298                                         //save 95%
299                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)]);
300                                         //save 99%
301                                         temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)]);
302                                         
303                                 }
304                                 
305                                 //output quan value
306                                 out4 << i+1 << '\t';                            
307                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
308                                 out4 << endl;
309                                 
310                                 quantiles[i] = temp;
311                                 
312                         }
313                         
314                         out4.close();
315                         
316                         mothurOut("Done."); mothurOutEndLine();
317                         
318                 }
319                         
320                 //free memory
321                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
322                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
323                         
324                 delete distcalculator;
325                 delete decalc;
326         }
327         catch(exception& e) {
328                 errorOut(e, "Pintail", "getChimeras");
329                 exit(1);
330         }
331 }
332
333 //***************************************************************************************************************
334
335 vector<float> Pintail::readFreq() {
336         try {
337         
338                 ifstream in;
339                 openInputFile(consfile, in);
340                 
341                 vector<float> prob;
342                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
343                 
344                 //read in probabilities and store in vector
345                 int pos; float num; 
346                 
347                 while(!in.eof()){
348                         
349                         in >> pos >> num;
350                         
351                         if (h.count(pos) > 0) {
352                                 float Pi;
353                                 Pi =  (num - 0.25) / 0.75; 
354                         
355                                 //cannot have probability less than 0.
356                                 if (Pi < 0) { Pi = 0.0; }
357
358                                 //do you want this spot
359                                 prob.push_back(Pi);  
360                         }
361                         
362                         gobble(in);
363                 }
364                 
365                 in.close();
366                 return prob;
367                 
368         }
369         catch(exception& e) {
370                 errorOut(e, "Pintail", "readFreq");
371                 exit(1);
372         }
373 }
374
375 //***************************************************************************************************************
376
377 vector< vector<float> > Pintail::readQuantiles() {
378         try {
379         
380                 ifstream in;
381                 openInputFile(quanfile, in);
382                 
383                 vector< vector<float> > quan;
384         
385                 int num; float ten, twentyfive, fifty, seventyfive, ninetyfive, ninetynine; 
386                 
387                 while(!in.eof()){
388                         
389                         in >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
390                         
391                         vector <float> temp;
392                         
393                         temp.push_back(ten); 
394                         temp.push_back(twentyfive);
395                         temp.push_back(fifty);
396                         temp.push_back(seventyfive);
397                         temp.push_back(ninetyfive);
398                         temp.push_back(ninetynine);
399                         
400                         quan.push_back(temp);  
401                         
402                         gobble(in);
403                 }
404                 
405                 in.close();
406                 return quan;
407                 
408         }
409         catch(exception& e) {
410                 errorOut(e, "Pintail", "readQuantiles");
411                 exit(1);
412         }
413 }
414 //***************************************************************************************************************
415 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
416 vector<Sequence*> Pintail::findPairs(int start, int end) {
417         try {
418                 
419                 vector<Sequence*> seqsMatches;  
420                 
421                 for(int i = start; i < end; i++){
422                 
423                         float smallest = 10000.0;
424                         Sequence query = *(querySeqs[i]);
425                         Sequence* match;
426                         
427                         for(int j = 0; j < templateSeqs.size(); j++){
428                                 
429                                 Sequence temp = *(templateSeqs[j]);
430                                 
431                                 distcalculator->calcDist(query, temp);
432                                 float dist = distcalculator->getDist();
433                                 
434                                 if (dist < smallest) { 
435                                         match = templateSeqs[j];
436                                         smallest = dist;
437                                 }
438                         }
439                         
440                         //make copy so trimSeqs doesn't overtrim
441                         Sequence* copy = new Sequence(match->getName(), match->getAligned());
442                         
443                         seqsMatches.push_back(copy);
444                 }
445                 
446                 return seqsMatches;
447         
448         }
449         catch(exception& e) {
450                 errorOut(e, "Pintail", "findPairs");
451                 exit(1);
452         }
453 }
454
455 /**************************************************************************************************/
456
457 void Pintail::createProcessesSpots() {
458         try {
459 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
460                 int process = 0;
461                 vector<int> processIDS;
462                 
463                 //loop through and create all the processes you want
464                 while (process != processors) {
465                         int pid = fork();
466                         
467                         if (pid > 0) {
468                                 processIDS.push_back(pid);  
469                                 process++;
470                         }else if (pid == 0){
471                                 
472                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
473                                 
474                                         //chops off beginning and end of sequences so they both start and end with a base
475                                         map<int, int> trim;
476
477                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
478                                         trimmed[j] = trim;
479                                         
480                                 }
481
482                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
483                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
484                                         it = trimmed[i].begin();
485                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
486                                 }
487                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
488                                 
489                                 //write out data to file so parent can read it
490                                 ofstream out;
491                                 string s = toString(getpid()) + ".temp";
492                                 openOutputFile(s, out);
493                                 
494                                 //output windowsForeachQuery
495                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
496                                         out << windowsForeachQuery[i].size() << '\t';
497                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
498                                                 out << windowsForeachQuery[i][j] << '\t';
499                                         }
500                                         out << endl;
501                                 }
502                         
503                                 //output windowSizes
504                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
505                                         out << windowSizes[i] << '\t';
506                                 }
507                                 out << endl;
508                                 
509                                 //output trimmed values
510                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
511                                         it = trimmed[i].begin();                                        
512                                         out << it->first << '\t' << it->second << endl;
513                                 }
514                                 out.close();
515                                 
516                                 exit(0);
517                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
518                 }
519                 
520                 //force parent to wait until all the processes are done
521                 for (int i=0;i<processors;i++) { 
522                         int temp = processIDS[i];
523                         wait(&temp);
524                 }
525                 
526                 //get data created by processes
527                 for (int i=0;i<processors;i++) { 
528                         ifstream in;
529                         string s = toString(processIDS[i]) + ".temp";
530                         openInputFile(s, in);
531                         
532                         int size = lines[i]->end - lines[i]->start;
533                                         
534                         int count = lines[i]->start;
535                         for (int m = 0; m < size; m++) {
536                                 int num;
537                                 in >> num;
538                         
539                                 vector<int> win;  int w;
540                                 for (int j = 0; j < num; j++) {
541                                         in >> w;
542                                         win.push_back(w);
543                                 }
544                         
545                                 windowsForeachQuery[count] = win;
546                                 count++;
547                                 gobble(in);
548                         }
549                 
550                         gobble(in);
551                         count = lines[i]->start;
552                         for (int m = 0; m < size; m++) {
553                                 int num;
554                                 in >> num;
555                                 
556                                 windowSizes[count] = num;
557                                 count++;
558                         }
559                         
560                         gobble(in);
561                         
562                         count = lines[i]->start;
563                         for (int m = 0; m < size; m++) {
564                                 int front, back;
565                                 in >> front >> back;
566                         
567                                 map<int, int> t;
568                                 
569                                 t[front] = back;
570                                 
571                                 trimmed[count] = t;
572                                 count++;
573                                 
574                                 gobble(in);
575                         }
576
577                         
578                         in.close();
579                         remove(s.c_str());
580                 }
581                         
582         
583 #else
584                 for (int j = 0; j < bestfit.size(); j++) {
585                         //chops off beginning and end of sequences so they both start and end with a base
586                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
587                 }
588
589                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
590                                 it = trimmed[i].begin();
591                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
592                                 windowsForeachQuery[i] = win;
593                 }
594
595 #endif          
596         }
597         catch(exception& e) {
598                 errorOut(e, "Pintail", "createProcessesSpots");
599                 exit(1);
600         }
601 }
602 /**************************************************************************************************/
603
604 void Pintail::createProcessesPairs() {
605         try {
606 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
607                 int process = 0;
608                 vector<int> processIDS;
609                 
610                 //loop through and create all the processes you want
611                 while (process != processors) {
612                         int pid = fork();
613                         
614                         if (pid > 0) {
615                                 processIDS.push_back(pid);  
616                                 process++;
617                         }else if (pid == 0){
618                                 
619                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
620                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
621                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
622                                 
623                                 //write out data to file so parent can read it
624                                 ofstream out;
625                                 string s = toString(getpid()) + ".temp";
626                                 openOutputFile(s, out);
627                                 
628                                 //output range and size
629                                 out << bestfit.size() << endl;
630                                 
631                                 //output pairs
632                                 for (int i = 0; i < bestfit.size(); i++) {
633                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
634                                 }
635                                 out.close();
636                                 
637                                 exit(0);
638                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
639                 }
640                 
641                 //force parent to wait until all the processes are done
642                 for (int i=0;i<processors;i++) { 
643                         int temp = processIDS[i];
644                         wait(&temp);
645                 }
646                 
647                 //get data created by processes
648                 for (int i=0;i<processors;i++) { 
649                         ifstream in;
650                         string s = toString(processIDS[i]) + ".temp";
651                         openInputFile(s, in);
652                         
653                         int size;
654                         in >> size;  gobble(in);
655                                 
656                         //get pairs
657                         int count = lines[i]->start;
658                         for (int m = 0; m < size; m++) {
659                                 Sequence* temp = new Sequence(in);
660                                 bestfit[count] = temp;
661                         
662                                 count++;
663                                 gobble(in);
664                         }
665                         
666                         in.close();
667                         remove(s.c_str());
668                 }
669                         
670         
671 #else
672                 bestfit = findPairs(lines[0]->start, lines[0]->end);
673 #endif          
674         }
675         catch(exception& e) {
676                 errorOut(e, "Pintail", "createProcessesPairs");
677                 exit(1);
678         }
679 }
680 /**************************************************************************************************/
681
682 void Pintail::createProcesses() {
683         try {
684 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
685                 int process = 0;
686                 vector<int> processIDS;
687                 
688                 //loop through and create all the processes you want
689                 while (process != processors) {
690                         int pid = fork();
691                         
692                         if (pid > 0) {
693                                 processIDS.push_back(pid);  
694                                 process++;
695                         }else if (pid == 0){
696                                 
697                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
698                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
699                                         
700                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
701                                         obsDistance[i] = obsi;
702                                 
703                                         //calc Qav
704                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
705                                         
706                                         //get alpha
707                                         float alpha = decalc->getCoef(obsDistance[i], q);
708                                         
709                                         //find expected
710                                         vector<float> exp = decalc->calcExpected(q, alpha);
711                                         expectedDistance[i] = exp;
712                                         
713                                         //get de and deviation
714                                         float dei = decalc->calcDE(obsi, exp);
715                                         DE[i] = dei;
716                                         
717                                         it = trimmed[i].begin();
718                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
719                                         deviation[i] = dist;
720                                 }
721                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
722                                 
723                                 //write out data to file so parent can read it
724                                 ofstream out;
725                                 string s = toString(getpid()) + ".temp";
726                                 openOutputFile(s, out);
727                                 
728                                 int size = lines[process]->end - lines[process]->start;
729                                 out << size << endl;
730                                                                 
731                                 //output observed distances
732                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
733                                         out << obsDistance[i].size() << '\t';
734                                         for (int j = 0; j < obsDistance[i].size(); j++) {
735                                                 out << obsDistance[i][j] << '\t';
736                                         }
737                                         out << endl;
738                                 }
739                                 
740                                 
741                                 //output expected distances
742                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
743                                         out << expectedDistance[i].size() << '\t';
744                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
745                                                 out << expectedDistance[i][j] << '\t';
746                                         }
747                                         out << endl;
748                                 }
749
750                         
751                                 //output de values
752                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
753                                         out << DE[i] << '\t';
754                                 }
755                                 out << endl;    
756                                 
757                                 //output de values
758                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
759                                         out << deviation[i] << '\t';
760                                 }
761                                 out << endl;
762                                 
763                                 out.close();
764
765                                 exit(0);
766                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
767                 }
768                 
769                 //force parent to wait until all the processes are done
770                 for (int i=0;i<processors;i++) { 
771                         int temp = processIDS[i];
772                         wait(&temp);
773                 }
774                 
775                 //get data created by processes
776                 for (int i=0;i<processors;i++) { 
777                         ifstream in;
778                         string s = toString(processIDS[i]) + ".temp";
779                         openInputFile(s, in);
780                         
781                         int size;
782                         in >> size;  gobble(in);
783                         
784                         //get observed distances
785                         int count = lines[i]->start;
786                         for (int m = 0; m < size; m++) {
787                                 int num;
788                                 in >> num;
789                         
790                                 vector<float> obs;  float w;
791                                 for (int j = 0; j < num; j++) {
792                                         in >> w;
793                                         obs.push_back(w);
794                                 }
795                         
796                                 obsDistance[count] = obs;
797                                 count++;
798                                 gobble(in);
799                         }
800                         
801                         gobble(in);
802                         
803                         //get expected distances
804                         count = lines[i]->start;
805                         for (int m = 0; m < size; m++) {
806                                 int num;
807                                 in >> num;
808                         
809                                 vector<float> exp;  float w;
810                                 for (int j = 0; j < num; j++) {
811                                         in >> w;
812                                         exp.push_back(w);
813                                 }
814                         
815                                 expectedDistance[count] = exp;
816                                 count++;
817                                 gobble(in);
818                         }
819
820                         gobble(in);
821                         
822                         count = lines[i]->start;
823                         for (int m = 0; m < size; m++) {
824                                 float num;
825                                 in >> num;
826                                 
827                                 DE[count] = num;
828                                 count++;
829                         }
830                         
831                         gobble(in);
832                         
833                         count = lines[i]->start;
834                         for (int m = 0; m < size; m++) {
835                                 float num;
836                                 in >> num;
837                                 
838                                 deviation[count] = num;
839                                 count++;
840                         }
841
842                         in.close();
843                         remove(s.c_str());
844                 }
845
846                                 
847 #else
848                         mothurOut("Calculating observed distance... "); cout.flush();
849                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
850                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
851                                 obsDistance[i] = obsi;
852                         }
853                         mothurOut("Done."); mothurOutEndLine();
854                         
855                         
856                         
857                         mothurOut("Finding variability... "); cout.flush();
858                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
859                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
860                                 Qav[i] = q;
861                         }
862                         mothurOut("Done."); mothurOutEndLine();
863                         
864                         
865                         
866                         mothurOut("Calculating alpha... "); cout.flush();
867                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
868                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
869                                 seqCoef.push_back(alpha);
870                         }
871                         mothurOut("Done."); mothurOutEndLine();
872                 
873                 
874                 
875                         mothurOut("Calculating expected distance... "); cout.flush();
876                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
877                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
878                                 expectedDistance[i] = exp;
879                         }
880                         mothurOut("Done."); mothurOutEndLine();
881                         
882                         
883                         
884                         mothurOut("Finding deviation... "); cout.flush();
885                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
886                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
887                                 DE[i] = de;
888                                 
889                                 it = trimmed[i].begin();
890                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
891                                 deviation[i] = dist;
892                         }
893                         mothurOut("Done."); mothurOutEndLine();
894
895 #endif          
896         }
897         catch(exception& e) {
898                 errorOut(e, "Pintail", "createProcesses");
899                 exit(1);
900         }
901 }
902
903
904 /**************************************************************************************************/
905
906 void Pintail::createProcessesQuan() {
907         try {
908 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
909                 int process = 0;
910                 vector<int> processIDS;
911                                 
912                 //loop through and create all the processes you want
913                 while (process != processors) {
914                         int pid = fork();
915                         
916                         if (pid > 0) {
917                                 processIDS.push_back(pid);  
918                                 process++;
919                         }else if (pid == 0){
920                                 
921                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end, makeCompliant);
922                                 
923                                 //write out data to file so parent can read it
924                                 ofstream out;
925                                 string s = toString(getpid()) + ".temp";
926                                 openOutputFile(s, out);
927                                 
928                                                                 
929                                 //output observed distances
930                                 for (int i = 0; i < quantilesMembers.size(); i++) {
931                                         out << quantilesMembers[i].size() << '\t';
932                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
933                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
934                                         }
935                                         out << endl;
936                                 }
937                                 
938                                 out.close();
939                                 
940                                 exit(0);
941                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
942                 }
943                 
944                 //force parent to wait until all the processes are done
945                 for (int i=0;i<processors;i++) { 
946                         int temp = processIDS[i];
947                         wait(&temp);
948                 }
949
950                 //get data created by processes
951                 for (int i=0;i<processors;i++) { 
952                         ifstream in;
953                         string s = toString(processIDS[i]) + ".temp";
954                         openInputFile(s, in);
955                         
956                         vector< vector<quanMember> > quan; 
957                         quan.resize(100);
958                         
959                         //get quantiles
960                         for (int m = 0; m < quan.size(); m++) {
961                                 int num;
962                                 in >> num; 
963                                 
964                                 gobble(in);
965
966                                 vector<quanMember> q;  float w; int b, n;
967                                 for (int j = 0; j < num; j++) {
968                                         in >> w >> b >> n;
969         //cout << w << '\t' << b << '\t' n << endl;
970                                         quanMember newMember(w, b, n);
971                                         q.push_back(newMember);
972                                 }
973 //cout << "here" << endl;
974                                 quan[m] = q;
975 //cout << "now here" << endl;
976                                 gobble(in);
977                         }
978                         
979         
980                         //save quan in quantiles
981                         for (int j = 0; j < quan.size(); j++) {
982                                 //put all values of q[i] into quan[i]
983                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
984                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
985                         }
986                                         
987                         in.close();
988                         remove(s.c_str());
989                 }
990                 
991 #else
992                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size(), makeCompliant);
993 #endif          
994         }
995         catch(exception& e) {
996                 errorOut(e, "Pintail", "createProcessesQuan");
997                 exit(1);
998         }
999 }
1000
1001
1002 //***************************************************************************************************************
1003
1004