]> git.donarmstrong.com Git - mothur.git/blob - pintail.cpp
added check for blank files in openInputFile, add check for groupfiles that contain...
[mothur.git] / pintail.cpp
1 /*
2  *  pintail.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/9/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "pintail.h"
11 #include "ignoregaps.h"
12 #include "eachgapdist.h"
13
14 //********************************************************************************************************************
15 //sorts lowest to highest
16 inline bool compareQuanMembers(quanMember left, quanMember right){
17         return (left.score < right.score);      
18
19 //***************************************************************************************************************
20
21 Pintail::Pintail(string filename, string temp) {  fastafile = filename;  templateFile = temp;  }
22 //***************************************************************************************************************
23
24 Pintail::~Pintail() {
25         try {
26                 for (int i = 0; i < querySeqs.size(); i++)              {  delete querySeqs[i];         }
27                 for (int i = 0; i < templateSeqs.size(); i++)   {  delete templateSeqs[i];      }
28                 for (int i = 0; i < bestfit.size(); i++)                {  delete bestfit[i];           }  
29         }
30         catch(exception& e) {
31                 errorOut(e, "Pintail", "~Pintail");
32                 exit(1);
33         }
34 }       
35 //***************************************************************************************************************
36 void Pintail::print(ostream& out) {
37         try {
38                 
39                 mothurOutEndLine();
40                 
41                 for (int i = 0; i < querySeqs.size(); i++) {
42                         
43                         int index = ceil(deviation[i]);
44                                                 
45                         //is your DE value higher than the 95%
46                         string chimera;
47                         if (quantiles[index][4] == 0.0) {
48                                 chimera = "Your template does not include sequences that provide quantile values at distance " + toString(index);
49                         }else {
50                                 if (DE[i] > quantiles[index][4])                {       chimera = "Yes";        }
51                                 else                                                                    {       chimera = "No";         }
52                         }
53                         
54                         out << querySeqs[i]->getName() << '\t' << "div: " << deviation[i] << "\tstDev: " << DE[i] << "\tchimera flag: " << chimera << endl;
55                         if (chimera == "Yes") {
56                                 mothurOut(querySeqs[i]->getName() + "\tdiv: " + toString(deviation[i]) + "\tstDev: " + toString(DE[i]) + "\tchimera flag: " + chimera); mothurOutEndLine();
57                         }
58                         out << "Observed\t";
59                         
60                         for (int j = 0; j < obsDistance[i].size(); j++) {  out << obsDistance[i][j] << '\t';  }
61                         out << endl;
62                         
63                         out << "Expected\t";
64                         
65                         for (int m = 0; m < expectedDistance[i].size(); m++) {  out << expectedDistance[i][m] << '\t';  }
66                         out << endl;
67                         
68                 }
69         }
70         catch(exception& e) {
71                 errorOut(e, "Pintail", "print");
72                 exit(1);
73         }
74 }
75
76 //***************************************************************************************************************
77 void Pintail::getChimeras() {
78         try {
79                 
80                 //read in query sequences and subject sequences
81                 mothurOut("Reading sequences and template file... "); cout.flush();
82                 querySeqs = readSeqs(fastafile);
83                 templateSeqs = readSeqs(templateFile);
84                 mothurOut("Done."); mothurOutEndLine();
85                 
86                 int numSeqs = querySeqs.size();
87                 
88                 obsDistance.resize(numSeqs);
89                 expectedDistance.resize(numSeqs);
90                 seqCoef.resize(numSeqs);
91                 DE.resize(numSeqs);
92                 Qav.resize(numSeqs);
93                 bestfit.resize(numSeqs);
94                 deviation.resize(numSeqs);
95                 trimmed.resize(numSeqs);
96                 windowSizes.resize(numSeqs, window);
97                 windowSizesTemplate.resize(templateSeqs.size(), window);
98                 windowsForeachQuery.resize(numSeqs);
99                 h.resize(numSeqs);
100                 quantiles.resize(100);  //one for every percent mismatch
101                 quantilesMembers.resize(100);  //one for every percent mismatch
102                 
103                 //break up file if needed
104                 int linesPerProcess = numSeqs / processors ;
105                 
106                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
107                         //find breakup of sequences for all times we will Parallelize
108                         if (processors == 1) {   lines.push_back(new linePair(0, numSeqs));  }
109                         else {
110                                 //fill line pairs
111                                 for (int i = 0; i < (processors-1); i++) {                      
112                                         lines.push_back(new linePair((i*linesPerProcess), ((i*linesPerProcess) + linesPerProcess)));
113                                 }
114                                 //this is necessary to get remainder of processors / numSeqs so you don't miss any lines at the end
115                                 int i = processors - 1;
116                                 lines.push_back(new linePair((i*linesPerProcess), numSeqs));
117                         }
118                         
119                         //find breakup of templatefile for quantiles
120                         if (processors == 1) {   templateLines.push_back(new linePair(0, templateSeqs.size()));  }
121                         else { 
122                                 for (int i = 0; i < processors; i++) {
123                                         templateLines.push_back(new linePair());
124                                         templateLines[i]->start = int (sqrt(float(i)/float(processors)) * templateSeqs.size());
125                                         templateLines[i]->end = int (sqrt(float(i+1)/float(processors)) * templateSeqs.size());
126                                 }
127                         }
128                 #else
129                         lines.push_back(new linePair(0, numSeqs));
130                         templateLines.push_back(new linePair(0, templateSeqs.size()));
131                 #endif
132                 
133                 distcalculator = new eachGapDist();
134                 decalc = new DeCalculator();
135                 
136                 //if the user does enter a mask then you want to keep all the spots in the alignment
137                 if (seqMask.length() == 0)      {       decalc->setAlignmentLength(querySeqs[0]->getAligned().length());        }
138                 else                                            {       decalc->setAlignmentLength(seqMask.length());                                           }
139                 
140                 decalc->setMask(seqMask);
141                 
142                 //find pairs
143                 if (processors == 1) { 
144                         mothurOut("Finding closest sequence in template to each sequence... "); cout.flush();
145                         bestfit = findPairs(lines[0]->start, lines[0]->end);
146                         mothurOut("Done."); mothurOutEndLine();
147                 }else {         createProcessesPairs();         }
148                 
149 string o = "closestmatch.eachgap.fasta";
150 ofstream out7;
151 openOutputFile(o, out7);
152
153 for (int i = 0; i < bestfit.size(); i++) {
154         out7 << ">" << querySeqs[i]->getName() << "-"<< bestfit[i]->getName() << endl;
155         out7 << bestfit[i]->getAligned() << endl;
156 }               
157 out7.close();   
158                 //find P
159                 mothurOut("Getting conservation... "); cout.flush();
160                 if (consfile == "") { 
161                         mothurOut("Calculating probability of conservation for your template sequences.  This can take a while...  I will output the frequency of the highest base in each position to a .freq file so that you can input them using the conservation parameter next time you run this command.  Providing the .freq file will improve speed.    "); cout.flush();
162                         probabilityProfile = decalc->calcFreq(templateSeqs, templateFile); 
163                         mothurOut("Done."); mothurOutEndLine();
164                 }else                           {   probabilityProfile = readFreq();                      }
165
166                 //make P into Q
167                 for (int i = 0; i < probabilityProfile.size(); i++)  {  probabilityProfile[i] = 1 - probabilityProfile[i];  }  //cout << i << '\t' << probabilityProfile[i] << endl;
168                 mothurOut("Done."); mothurOutEndLine();
169                 
170                 //mask sequences if the user wants to 
171                 if (seqMask != "") {
172                         //mask querys
173                         for (int i = 0; i < querySeqs.size(); i++) {
174                                 decalc->runMask(querySeqs[i]);
175                         }
176                 
177                         //mask templates
178                         for (int i = 0; i < templateSeqs.size(); i++) {
179                                 decalc->runMask(templateSeqs[i]);
180                         }
181                         
182                         for (int i = 0; i < bestfit.size(); i++) { 
183                                 decalc->runMask(bestfit[i]);
184                         }
185
186                 }
187                 
188                 if (filter) {
189                         vector<Sequence*> temp = templateSeqs;
190                         for (int i = 0; i < querySeqs.size(); i++) { temp.push_back(querySeqs[i]);  }
191                         
192                         createFilter(temp);
193                         
194                         runFilter(querySeqs);
195                         runFilter(templateSeqs);
196                         runFilter(bestfit);
197                 }
198                                                                                 
199                                                                                                                                                 
200                 if (processors == 1) { 
201         
202                         for (int j = 0; j < bestfit.size(); j++) { 
203                                 decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
204                         }
205                         
206                         mothurOut("Finding window breaks... "); cout.flush();
207                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
208                                 it = trimmed[i].begin();
209                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
210                                 windowsForeachQuery[i] = win;
211                         }
212                         mothurOut("Done."); mothurOutEndLine();
213                 
214                 }else {         createProcessesSpots();         }
215                 
216                 if (processors == 1) { 
217                                                 
218                         mothurOut("Calculating observed distance... "); cout.flush();
219                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
220                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
221         
222                                 obsDistance[i] = obsi;
223                         }
224                         mothurOut("Done."); mothurOutEndLine();
225                         
226                         
227                         mothurOut("Finding variability... "); cout.flush();
228                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
229                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
230
231                                 Qav[i] = q;
232                         }
233                         mothurOut("Done."); mothurOutEndLine();
234                         
235                         
236                         mothurOut("Calculating alpha... "); cout.flush();
237                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
238                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
239                                 seqCoef[i] = alpha;
240                         }
241                         mothurOut("Done."); mothurOutEndLine();
242                 
243                 
244                         mothurOut("Calculating expected distance... "); cout.flush();
245                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
246                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
247                                 expectedDistance[i] = exp;
248                         }
249                         mothurOut("Done."); mothurOutEndLine();
250                         
251                         
252                         mothurOut("Finding deviation... "); cout.flush();
253                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
254                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
255                                 DE[i] = de;
256                         
257                                 it = trimmed[i].begin();
258                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
259                                 deviation[i] = dist;
260                         }
261                         mothurOut("Done."); mothurOutEndLine();
262                         
263                 } 
264                 else {          createProcesses();              }
265                 
266                 //quantiles are used to determine whether the de values found indicate a chimera
267                 //if you have to calculate them, its time intensive because you are finding the de and deviation values for each 
268                 //combination of sequences in the template
269                 if (quanfile != "") {  quantiles = readQuantiles();  }
270                 else {
271                         
272                         mothurOut("Calculating quantiles for your template.  This can take a while...  I will output the quantiles to a .quan file that you can input them using the quantiles parameter next time you run this command.  Providing the .quan file will dramatically improve speed.    "); cout.flush();
273                         if (processors == 1) { 
274                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
275                         }else {         createProcessesQuan();          }
276                 
277                         
278                         ofstream out4, out5;
279                         string noOutliers, outliers;
280                         
281                         if ((!filter) && (seqMask == "")) {
282                                 noOutliers = getRootName(templateFile) + "pintail.quan";
283                         }else if ((filter) && (seqMask == "")) { 
284                                 noOutliers = getRootName(templateFile) + "pintail.filtered.quan";
285                         }else if ((!filter) && (seqMask != "")) { 
286                                 noOutliers = getRootName(templateFile) + "pintail.masked.quan";
287                         }else if ((filter) && (seqMask != "")) { 
288                                 noOutliers = getRootName(templateFile) + "pintail.filtered.masked.quan";
289                         }
290
291                         //outliers = getRootName(templateFile) + "pintail.quanYESOUTLIERS";
292                         
293                         /*openOutputFile(outliers, out4);
294                         
295                         //adjust quantiles
296                         for (int i = 0; i < quantilesMembers.size(); i++) {
297                                 vector<float> temp;
298                                 
299                                 if (quantilesMembers[i].size() == 0) {
300                                         //in case this is not a distance found in your template files
301                                         for (int g = 0; g < 6; g++) {
302                                                 temp.push_back(0.0);
303                                         }
304                                 }else{
305                                         
306                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
307                                         
308                                         //save 10%
309                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
310                                         //save 25%
311                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
312                                         //save 50%
313                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
314                                         //save 75%
315                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
316                                         //save 95%
317                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
318                                         //save 99%
319                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
320                                         
321                                 }
322                                 
323                                 //output quan value
324                                 out4 << i+1 << '\t';                            
325                                 for (int u = 0; u < temp.size(); u++) {   out4 << temp[u] << '\t'; }
326                                 out4 << endl;
327                                 
328                                 quantiles[i] = temp;
329                                 
330                         }
331                         
332                         out4.close();*/
333                         
334                         decalc->removeObviousOutliers(quantilesMembers, templateSeqs.size());
335                         
336                         openOutputFile(noOutliers, out5);
337                         
338                         //adjust quantiles
339                         for (int i = 0; i < quantilesMembers.size(); i++) {
340                                 vector<float> temp;
341                                 
342                                 if (quantilesMembers[i].size() == 0) {
343                                         //in case this is not a distance found in your template files
344                                         for (int g = 0; g < 6; g++) {
345                                                 temp.push_back(0.0);
346                                         }
347                                 }else{
348                                         
349                                         sort(quantilesMembers[i].begin(), quantilesMembers[i].end(), compareQuanMembers);
350                                         
351                                         //save 10%
352                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.10)].score);
353                                         //save 25%
354                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.25)].score);
355                                         //save 50%
356                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.5)].score);
357                                         //save 75%
358                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.75)].score);
359                                         //save 95%
360                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.95)].score);
361                                         //save 99%
362                                         temp.push_back(quantilesMembers[i][int(quantilesMembers[i].size() * 0.99)].score);
363                                         
364                                 }
365                                 
366                                 //output quan value
367                                 out5 << i+1 << '\t';                            
368                                 for (int u = 0; u < temp.size(); u++) {   out5 << temp[u] << '\t'; }
369                                 out5 << endl;
370                                 
371                                 quantiles[i] = temp;
372                                 
373                         }
374
375                         mothurOut("Done."); mothurOutEndLine();
376                         
377                 }
378                         
379                 //free memory
380                 for (int i = 0; i < lines.size(); i++)                                  {       delete lines[i];                                }
381                 for (int i = 0; i < templateLines.size(); i++)                  {       delete templateLines[i];                }
382                         
383                 delete distcalculator;
384                 delete decalc;
385         }
386         catch(exception& e) {
387                 errorOut(e, "Pintail", "getChimeras");
388                 exit(1);
389         }
390 }
391
392 //***************************************************************************************************************
393
394 vector<float> Pintail::readFreq() {
395         try {
396         
397                 ifstream in;
398                 openInputFile(consfile, in);
399                 
400                 vector<float> prob;
401                 set<int> h = decalc->getPos();  //positions of bases in masking sequence
402                 
403                 //read in probabilities and store in vector
404                 int pos; float num; 
405                 
406                 while(!in.eof()){
407                         
408                         in >> pos >> num;
409                         
410                         if (h.count(pos) > 0) {
411                                 float Pi;
412                                 Pi =  (num - 0.25) / 0.75; 
413                         
414                                 //cannot have probability less than 0.
415                                 if (Pi < 0) { Pi = 0.0; }
416
417                                 //do you want this spot
418                                 prob.push_back(Pi);  
419                         }
420                         
421                         gobble(in);
422                 }
423                 
424                 in.close();
425                 return prob;
426                 
427         }
428         catch(exception& e) {
429                 errorOut(e, "Pintail", "readFreq");
430                 exit(1);
431         }
432 }
433
434 //***************************************************************************************************************
435 //calculate the distances from each query sequence to all sequences in the template to find the closest sequence
436 vector<Sequence*> Pintail::findPairs(int start, int end) {
437         try {
438                 
439                 vector<Sequence*> seqsMatches;  
440                 
441                 for(int i = start; i < end; i++){
442                 
443                         float smallest = 10000.0;
444                         Sequence query = *(querySeqs[i]);
445                         Sequence* match;
446                         
447                         for(int j = 0; j < templateSeqs.size(); j++){
448                                 
449                                 Sequence temp = *(templateSeqs[j]);
450                                 
451                                 distcalculator->calcDist(query, temp);
452                                 float dist = distcalculator->getDist();
453                                 
454                                 if (dist < smallest) { 
455                                         match = templateSeqs[j];
456                                         smallest = dist;
457                                 }
458                         }
459                         
460                         //make copy so trimSeqs doesn't overtrim
461                         Sequence* copy = new Sequence(match->getName(), match->getAligned());
462                         
463                         seqsMatches.push_back(copy);
464                 }
465                 
466                 return seqsMatches;
467         
468         }
469         catch(exception& e) {
470                 errorOut(e, "Pintail", "findPairs");
471                 exit(1);
472         }
473 }
474
475 /**************************************************************************************************/
476
477 void Pintail::createProcessesSpots() {
478         try {
479 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
480                 int process = 0;
481                 vector<int> processIDS;
482                 
483                 //loop through and create all the processes you want
484                 while (process != processors) {
485                         int pid = fork();
486                         
487                         if (pid > 0) {
488                                 processIDS.push_back(pid);  
489                                 process++;
490                         }else if (pid == 0){
491                                 
492                                 for (int j = lines[process]->start; j < lines[process]->end; j++) {
493                                 
494                                         //chops off beginning and end of sequences so they both start and end with a base
495                                         map<int, int> trim;
496
497                                         decalc->trimSeqs(querySeqs[j], bestfit[j], trim); 
498                                         trimmed[j] = trim;
499                                         
500                                 }
501
502                                 mothurOut("Finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
503                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
504                                         it = trimmed[i].begin();
505                                         windowsForeachQuery[i] = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
506                                 }
507                                 mothurOut("Done finding window breaks for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
508                                 
509                                 //write out data to file so parent can read it
510                                 ofstream out;
511                                 string s = toString(getpid()) + ".temp";
512                                 openOutputFile(s, out);
513                                 
514                                 //output windowsForeachQuery
515                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
516                                         out << windowsForeachQuery[i].size() << '\t';
517                                         for (int j = 0; j < windowsForeachQuery[i].size(); j++) {
518                                                 out << windowsForeachQuery[i][j] << '\t';
519                                         }
520                                         out << endl;
521                                 }
522                         
523                                 //output windowSizes
524                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
525                                         out << windowSizes[i] << '\t';
526                                 }
527                                 out << endl;
528                                 
529                                 //output trimmed values
530                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
531                                         it = trimmed[i].begin();                                        
532                                         out << it->first << '\t' << it->second << endl;
533                                 }
534                                 out.close();
535                                 
536                                 exit(0);
537                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
538                 }
539                 
540                 //force parent to wait until all the processes are done
541                 for (int i=0;i<processors;i++) { 
542                         int temp = processIDS[i];
543                         wait(&temp);
544                 }
545                 
546                 //get data created by processes
547                 for (int i=0;i<processors;i++) { 
548                         ifstream in;
549                         string s = toString(processIDS[i]) + ".temp";
550                         openInputFile(s, in);
551                         
552                         int size = lines[i]->end - lines[i]->start;
553                                         
554                         int count = lines[i]->start;
555                         for (int m = 0; m < size; m++) {
556                                 int num;
557                                 in >> num;
558                         
559                                 vector<int> win;  int w;
560                                 for (int j = 0; j < num; j++) {
561                                         in >> w;
562                                         win.push_back(w);
563                                 }
564                         
565                                 windowsForeachQuery[count] = win;
566                                 count++;
567                                 gobble(in);
568                         }
569                 
570                         gobble(in);
571                         count = lines[i]->start;
572                         for (int m = 0; m < size; m++) {
573                                 int num;
574                                 in >> num;
575                                 
576                                 windowSizes[count] = num;
577                                 count++;
578                         }
579                         
580                         gobble(in);
581                         
582                         count = lines[i]->start;
583                         for (int m = 0; m < size; m++) {
584                                 int front, back;
585                                 in >> front >> back;
586                         
587                                 map<int, int> t;
588                                 
589                                 t[front] = back;
590                                 
591                                 trimmed[count] = t;
592                                 count++;
593                                 
594                                 gobble(in);
595                         }
596
597                         
598                         in.close();
599                         remove(s.c_str());
600                 }
601                         
602         
603 #else
604                 for (int j = 0; j < bestfit.size(); j++) {
605                         //chops off beginning and end of sequences so they both start and end with a base
606                         decalc->trimSeqs(querySeqs[j], bestfit[j], trimmed[j]);  
607                 }
608
609                 for (int i = lines[0]->start; i < lines[0]->end; i++) {
610                                 it = trimmed[i].begin();
611                                 vector<int> win = decalc->findWindows(querySeqs[i], it->first, it->second, windowSizes[i], increment);
612                                 windowsForeachQuery[i] = win;
613                 }
614
615 #endif          
616         }
617         catch(exception& e) {
618                 errorOut(e, "Pintail", "createProcessesSpots");
619                 exit(1);
620         }
621 }
622 /**************************************************************************************************/
623
624 void Pintail::createProcessesPairs() {
625         try {
626 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
627                 int process = 0;
628                 vector<int> processIDS;
629                 
630                 //loop through and create all the processes you want
631                 while (process != processors) {
632                         int pid = fork();
633                         
634                         if (pid > 0) {
635                                 processIDS.push_back(pid);  
636                                 process++;
637                         }else if (pid == 0){
638                                 
639                                 mothurOut("Finding pairs for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
640                                 bestfit = findPairs(lines[process]->start, lines[process]->end);
641                                 mothurOut("Done finding pairs for sequences " +  toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
642                                 
643                                 //write out data to file so parent can read it
644                                 ofstream out;
645                                 string s = toString(getpid()) + ".temp";
646                                 openOutputFile(s, out);
647                                 
648                                 //output range and size
649                                 out << bestfit.size() << endl;
650                                 
651                                 //output pairs
652                                 for (int i = 0; i < bestfit.size(); i++) {
653                                         out << ">" << bestfit[i]->getName() << endl << bestfit[i]->getAligned() << endl;
654                                 }
655                                 out.close();
656                                 
657                                 exit(0);
658                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
659                 }
660                 
661                 //force parent to wait until all the processes are done
662                 for (int i=0;i<processors;i++) { 
663                         int temp = processIDS[i];
664                         wait(&temp);
665                 }
666                 
667                 //get data created by processes
668                 for (int i=0;i<processors;i++) { 
669                         ifstream in;
670                         string s = toString(processIDS[i]) + ".temp";
671                         openInputFile(s, in);
672                         
673                         int size;
674                         in >> size;  gobble(in);
675                                 
676                         //get pairs
677                         int count = lines[i]->start;
678                         for (int m = 0; m < size; m++) {
679                                 Sequence* temp = new Sequence(in);
680                                 bestfit[count] = temp;
681                         
682                                 count++;
683                                 gobble(in);
684                         }
685                         
686                         in.close();
687                         remove(s.c_str());
688                 }
689                         
690         
691 #else
692                 bestfit = findPairs(lines[0]->start, lines[0]->end);
693 #endif          
694         }
695         catch(exception& e) {
696                 errorOut(e, "Pintail", "createProcessesPairs");
697                 exit(1);
698         }
699 }
700 /**************************************************************************************************/
701
702 void Pintail::createProcesses() {
703         try {
704 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
705                 int process = 0;
706                 vector<int> processIDS;
707                 
708                 //loop through and create all the processes you want
709                 while (process != processors) {
710                         int pid = fork();
711                         
712                         if (pid > 0) {
713                                 processIDS.push_back(pid);  
714                                 process++;
715                         }else if (pid == 0){
716                                 
717                                 mothurOut("Calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
718                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
719                                         
720                                         vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
721                                         obsDistance[i] = obsi;
722                                 
723                                         //calc Qav
724                                         vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
725                                         
726                                         //get alpha
727                                         float alpha = decalc->getCoef(obsDistance[i], q);
728                                         
729                                         //find expected
730                                         vector<float> exp = decalc->calcExpected(q, alpha);
731                                         expectedDistance[i] = exp;
732                                         
733                                         //get de and deviation
734                                         float dei = decalc->calcDE(obsi, exp);
735                                         DE[i] = dei;
736                                         
737                                         it = trimmed[i].begin();
738                                         float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
739                                         deviation[i] = dist;
740                                 }
741                                 mothurOut("Done calculating observed, expected and de values for sequences " + toString(lines[process]->start) + " to " + toString(lines[process]->end)); mothurOutEndLine();
742                                 
743                                 //write out data to file so parent can read it
744                                 ofstream out;
745                                 string s = toString(getpid()) + ".temp";
746                                 openOutputFile(s, out);
747                                 
748                                 int size = lines[process]->end - lines[process]->start;
749                                 out << size << endl;
750                                                                 
751                                 //output observed distances
752                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
753                                         out << obsDistance[i].size() << '\t';
754                                         for (int j = 0; j < obsDistance[i].size(); j++) {
755                                                 out << obsDistance[i][j] << '\t';
756                                         }
757                                         out << endl;
758                                 }
759                                 
760                                 
761                                 //output expected distances
762                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
763                                         out << expectedDistance[i].size() << '\t';
764                                         for (int j = 0; j < expectedDistance[i].size(); j++) {
765                                                 out << expectedDistance[i][j] << '\t';
766                                         }
767                                         out << endl;
768                                 }
769
770                         
771                                 //output de values
772                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
773                                         out << DE[i] << '\t';
774                                 }
775                                 out << endl;    
776                                 
777                                 //output de values
778                                 for (int i = lines[process]->start; i < lines[process]->end; i++) {
779                                         out << deviation[i] << '\t';
780                                 }
781                                 out << endl;
782                                 
783                                 out.close();
784
785                                 exit(0);
786                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
787                 }
788                 
789                 //force parent to wait until all the processes are done
790                 for (int i=0;i<processors;i++) { 
791                         int temp = processIDS[i];
792                         wait(&temp);
793                 }
794                 
795                 //get data created by processes
796                 for (int i=0;i<processors;i++) { 
797                         ifstream in;
798                         string s = toString(processIDS[i]) + ".temp";
799                         openInputFile(s, in);
800                         
801                         int size;
802                         in >> size;  gobble(in);
803                         
804                         //get observed distances
805                         int count = lines[i]->start;
806                         for (int m = 0; m < size; m++) {
807                                 int num;
808                                 in >> num;
809                         
810                                 vector<float> obs;  float w;
811                                 for (int j = 0; j < num; j++) {
812                                         in >> w;
813                                         obs.push_back(w);
814                                 }
815                         
816                                 obsDistance[count] = obs;
817                                 count++;
818                                 gobble(in);
819                         }
820                         
821                         gobble(in);
822                         
823                         //get expected distances
824                         count = lines[i]->start;
825                         for (int m = 0; m < size; m++) {
826                                 int num;
827                                 in >> num;
828                         
829                                 vector<float> exp;  float w;
830                                 for (int j = 0; j < num; j++) {
831                                         in >> w;
832                                         exp.push_back(w);
833                                 }
834                         
835                                 expectedDistance[count] = exp;
836                                 count++;
837                                 gobble(in);
838                         }
839
840                         gobble(in);
841                         
842                         count = lines[i]->start;
843                         for (int m = 0; m < size; m++) {
844                                 float num;
845                                 in >> num;
846                                 
847                                 DE[count] = num;
848                                 count++;
849                         }
850                         
851                         gobble(in);
852                         
853                         count = lines[i]->start;
854                         for (int m = 0; m < size; m++) {
855                                 float num;
856                                 in >> num;
857                                 
858                                 deviation[count] = num;
859                                 count++;
860                         }
861
862                         in.close();
863                         remove(s.c_str());
864                 }
865
866                                 
867 #else
868                         mothurOut("Calculating observed distance... "); cout.flush();
869                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
870                                 vector<float> obsi = decalc->calcObserved(querySeqs[i], bestfit[i], windowsForeachQuery[i], windowSizes[i]);
871                                 obsDistance[i] = obsi;
872                         }
873                         mothurOut("Done."); mothurOutEndLine();
874                         
875                         
876                         
877                         mothurOut("Finding variability... "); cout.flush();
878                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
879                                 vector<float> q = decalc->findQav(windowsForeachQuery[i], windowSizes[i], probabilityProfile);
880                                 Qav[i] = q;
881                         }
882                         mothurOut("Done."); mothurOutEndLine();
883                         
884                         
885                         
886                         mothurOut("Calculating alpha... "); cout.flush();
887                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
888                                 float alpha = decalc->getCoef(obsDistance[i], Qav[i]);
889                                 seqCoef.push_back(alpha);
890                         }
891                         mothurOut("Done."); mothurOutEndLine();
892                 
893                 
894                 
895                         mothurOut("Calculating expected distance... "); cout.flush();
896                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
897                                 vector<float> exp = decalc->calcExpected(Qav[i], seqCoef[i]);
898                                 expectedDistance[i] = exp;
899                         }
900                         mothurOut("Done."); mothurOutEndLine();
901                         
902                         
903                         
904                         mothurOut("Finding deviation... "); cout.flush();
905                         for (int i = lines[0]->start; i < lines[0]->end; i++) {
906                                 float de = decalc->calcDE(obsDistance[i], expectedDistance[i]);
907                                 DE[i] = de;
908                                 
909                                 it = trimmed[i].begin();
910                                 float dist = decalc->calcDist(querySeqs[i], bestfit[i], it->first, it->second); 
911                                 deviation[i] = dist;
912                         }
913                         mothurOut("Done."); mothurOutEndLine();
914
915 #endif          
916         }
917         catch(exception& e) {
918                 errorOut(e, "Pintail", "createProcesses");
919                 exit(1);
920         }
921 }
922
923
924 /**************************************************************************************************/
925
926 void Pintail::createProcessesQuan() {
927         try {
928 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
929                 int process = 0;
930                 vector<int> processIDS;
931                                 
932                 //loop through and create all the processes you want
933                 while (process != processors) {
934                         int pid = fork();
935                         
936                         if (pid > 0) {
937                                 processIDS.push_back(pid);  
938                                 process++;
939                         }else if (pid == 0){
940                                 
941                                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, templateLines[process]->start, templateLines[process]->end);
942                                 
943                                 //write out data to file so parent can read it
944                                 ofstream out;
945                                 string s = toString(getpid()) + ".temp";
946                                 openOutputFile(s, out);
947                                 
948                                                                 
949                                 //output observed distances
950                                 for (int i = 0; i < quantilesMembers.size(); i++) {
951                                         out << quantilesMembers[i].size() << '\t';
952                                         for (int j = 0; j < quantilesMembers[i].size(); j++) {
953                                                 out << quantilesMembers[i][j].score << '\t' << quantilesMembers[i][j].member1 << '\t' << quantilesMembers[i][j].member2 << '\t';
954                                         }
955                                         out << endl;
956                                 }
957                                 
958                                 out.close();
959                                 
960                                 exit(0);
961                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
962                 }
963                 
964                 //force parent to wait until all the processes are done
965                 for (int i=0;i<processors;i++) { 
966                         int temp = processIDS[i];
967                         wait(&temp);
968                 }
969
970                 //get data created by processes
971                 for (int i=0;i<processors;i++) { 
972                         ifstream in;
973                         string s = toString(processIDS[i]) + ".temp";
974                         openInputFile(s, in);
975                         
976                         vector< vector<quanMember> > quan; 
977                         quan.resize(100);
978                         
979                         //get quantiles
980                         for (int m = 0; m < quan.size(); m++) {
981                                 int num;
982                                 in >> num; 
983                                 
984                                 gobble(in);
985
986                                 vector<quanMember> q;  float w; int b, n;
987                                 for (int j = 0; j < num; j++) {
988                                         in >> w >> b >> n;
989         //cout << w << '\t' << b << '\t' n << endl;
990                                         quanMember newMember(w, b, n);
991                                         q.push_back(newMember);
992                                 }
993 //cout << "here" << endl;
994                                 quan[m] = q;
995 //cout << "now here" << endl;
996                                 gobble(in);
997                         }
998                         
999         
1000                         //save quan in quantiles
1001                         for (int j = 0; j < quan.size(); j++) {
1002                                 //put all values of q[i] into quan[i]
1003                                 for (int l = 0; l < quan[j].size(); l++) {  quantilesMembers[j].push_back(quan[j][l]);   }
1004                                 //quantilesMembers[j].insert(quantilesMembers[j].begin(), quan[j].begin(), quan[j].end());
1005                         }
1006                                         
1007                         in.close();
1008                         remove(s.c_str());
1009                 }
1010                 
1011 #else
1012                 quantilesMembers = decalc->getQuantiles(templateSeqs, windowSizesTemplate, window, probabilityProfile, increment, 0, templateSeqs.size());
1013 #endif          
1014         }
1015         catch(exception& e) {
1016                 errorOut(e, "Pintail", "createProcessesQuan");
1017                 exit(1);
1018         }
1019 }
1020
1021
1022 //***************************************************************************************************************
1023
1024