]> git.donarmstrong.com Git - genome.git/blob - genome.cpp
add missing parenthesis
[genome.git] / genome.cpp
1 ////////////////////////////////////////////////////////////////////// \r
2 // genome.cpp\r
3 //////////////////////////////////////////////////////////////////////////////\r
4 //              COPYRIGHT NOTICE FOR GENOME CODE\r
5 //\r
6 // Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
7 // Version 0.2\r
8 // All rights reserved.\r
9 //\r
10 //   Redistribution and use in source and binary forms, with or without\r
11 //   modification, are permitted provided that the following conditions\r
12 //   are met:\r
13 //\r
14 //     1. Redistributions of source code must retain the above copyright\r
15 //        notice, this list of conditions and the following disclaimer.\r
16 //\r
17 //     2. Redistributions in binary form must reproduce the above copyright\r
18 //        notice, this list of conditions and the following disclaimer in the\r
19 //        documentation and/or other materials provided with the distribution.\r
20 //\r
21 //     3. The names of its contributors may not be used to endorse or promote\r
22 //        products derived from this software without specific prior written\r
23 //        permission.\r
24 //\r
25 //   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
26 //   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
27 //   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
28 //   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
29 //   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
30 //   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
31 //   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
32 //   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
33 //   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
34 //   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
35 //   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
36 //\r
37 ///////////////////////////////////////////////////////////////////////////////\r
38 \r
39 \r
40 #define _CRT_SECURE_NO_DEPRECATE \r
41 \r
42 #include "Random.h"\r
43 #include "Error.h"\r
44 #include "stochastic.h" \r
45 #include <iostream>\r
46 #include <fstream>\r
47 #include <math.h>\r
48 #include <vector> \r
49 #include <time.h>\r
50 #include <algorithm>\r
51 #include <map>\r
52 #include <sstream>\r
53 #include <stdio.h>\r
54 #include <string.h>\r
55 \r
56 using namespace std;\r
57 \r
58 //static bool debug = false;\r
59 static bool fixedPOP = true;\r
60 static bool noChange = true;\r
61 \r
62 \r
63 static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment\r
64 static int maxN;\r
65 static unsigned int currentProfile;\r
66 static int nPOP=1;\r
67 static vector<int> nSample(nPOP,3);\r
68 static double recombination = 1e-8, migration = 0.0, mutation=1e-8;\r
69 \r
70 static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;\r
71 static int *** children, *** parents, * cblocks, * pblocks;\r
72 static int * parent_First, * parent_Last, * child_First, * child_Last;\r
73 static bool * child_flags, * parent_flags;\r
74 static int nextFree = 0, activeSegments = 0, pblockcounter = 0;\r
75 static int numBLOCKs;\r
76 static int BUFFER=1500;\r
77 static int INCREMENT=1000;\r
78 \r
79 #define BLOCKSIZE   128\r
80 #define SWAP(tmp,a,b)  tmp=a; a=b; b=tmp;\r
81 \r
82 static vector< vector< vector<bool> > > chromosome;\r
83 static int * fragmentLength;\r
84 static int * branchFragment;\r
85 static int * geneIndex;\r
86 static int * branchMutation;\r
87 \r
88 static float * recombVector=NULL;\r
89 \r
90 struct popProfilesStruct{\r
91         int generation;\r
92         vector<int> popsize;\r
93         vector<int> popStart;\r
94         vector< vector<float> > selectProb;\r
95         vector< vector<int> > popChange;\r
96 };\r
97 \r
98 static vector<struct popProfilesStruct> popProfile;\r
99 \r
100 void print(){\r
101         cout<<"\n*****************\nParents:\t";\r
102         for(int i=0;i<N;i++){\r
103                 if(parent_flags[i]){\r
104                         for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
105                         cout<<"\t";     \r
106                 }\r
107         }       \r
108         cout<<endl;\r
109         \r
110         cout<<"Children:\t";\r
111         for(int i=0;i<N;i++){\r
112                 if(child_flags[i]){\r
113                         for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
114                         cout<<"\t";     \r
115                 }\r
116         }       \r
117         cout<<endl;\r
118         \r
119         \r
120         \r
121 //      cout<<"parent_flag:\t";\r
122 //      for(int i=0;i<N;i++){\r
123 //              cout<<parent_flags[i]<<"\t";\r
124 //      }       \r
125 //      cout<<endl;\r
126 //      \r
127 //      cout<<"children_flag:\t";\r
128 //      for(int i=0;i<N;i++){\r
129 //              cout<<child_flags[i]<<"\t";\r
130 //      }       \r
131 //      cout<<endl;\r
132         \r
133         cout<<"genepool:\t";\r
134         for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
135         cout<<endl;\r
136         \r
137         cout<<"genetimes:\t";\r
138         for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
139         cout<<endl;\r
140         \r
141         cout<<"MCRA:\t";\r
142         for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";\r
143         cout<<endl;\r
144         \r
145         cout<<"MCRAtime:\t";\r
146         for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";\r
147         cout<<endl;\r
148         \r
149         cout<<"fragments:\t";\r
150         for(int i=0;i<L;i++)cout<<fragments[i]<<" ";\r
151         cout<<endl;\r
152         \r
153         cout<<"NextFree="<<nextFree<<endl;\r
154 }\r
155 \r
156 int *** AllocateIntPtrMatrix(int rows, int columns){\r
157    int *** result = new int ** [rows];\r
158 \r
159    for (int i = 0; i < rows; i++)\r
160       result[i] = new int * [columns];\r
161   \r
162    return result;\r
163 }\r
164    \r
165 void FreeIntPtrMatrix(int *** & matrix, int rows){\r
166         \r
167    for (int i = 0; i < rows; i++)\r
168       delete [] matrix[i];\r
169 \r
170    delete [] matrix;\r
171 \r
172    matrix = NULL;\r
173 }   \r
174    \r
175 void NewGeneration(){\r
176         \r
177    int *** ptrmatrix, * vector;\r
178    bool * boolvector;\r
179    \r
180 //    for(int i=0;i<maxN;i++){\r
181 //      if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]);   \r
182 //    }\r
183    \r
184    SWAP(ptrmatrix, children, parents);\r
185    SWAP(vector, child_First, parent_First);\r
186    SWAP(vector, child_Last, parent_Last);\r
187    SWAP(boolvector, child_flags, parent_flags);\r
188    SWAP(vector, cblocks, pblocks);\r
189    \r
190    if(!noChange){\r
191            currentProfile++;\r
192            nPOP = (int)(popProfile[currentProfile].popsize.size());\r
193            N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();\r
194            noChange = true;\r
195    }\r
196 \r
197    for (int i = 0; i < maxN; i++)\r
198       parent_flags[i] = 0;\r
199       \r
200 //    for (int i = 0; i < maxN; i++)\r
201 //        for (int j = 0; j < numBLOCKs; j++)\r
202 //            parents[i][j] = NULL;\r
203       \r
204    pblockcounter = 0;\r
205 \r
206 }\r
207 \r
208    \r
209 void AllocateMemory(vector< vector<bool> > &return_chromosome){\r
210    \r
211    \r
212 //    if(SNP<=0){\r
213 //              cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;\r
214 //    }\r
215 //    else{\r
216 //              cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;\r
217 //    }\r
218 //    cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;\r
219    \r
220  \r
221    return_chromosome.clear();\r
222    return_chromosome.resize(n);\r
223 //    for(int i=0;i<n;i++){\r
224 //         if(SNP<=0){\r
225 //                      //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));\r
226 //         }\r
227 //         else{\r
228 //                      //return_chromosome[i].reserve(SNP*numChr);\r
229 //         }\r
230 //      }\r
231    \r
232    chromosome.resize(n);\r
233    for(int i=0;i<n;i++){\r
234                 chromosome[i].resize(L);\r
235 //              for(int j=0;j<L;j++){\r
236 //                 if(SNP<=0){\r
237 //                         //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));\r
238 //                 }\r
239 //                 else{\r
240 //                         //chromosome[i][j].reserve(SNP/L+1);\r
241 //                 }\r
242 //              }\r
243    }\r
244   \r
245    genepool = new int [poolsize];\r
246 \r
247    numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);\r
248  \r
249    pblockcounter = 0;\r
250 \r
251 }\r
252 \r
253 \r
254 void AllocateMutationMemory(){\r
255         \r
256         fragmentLength = new int [L];\r
257         \r
258         branchFragment = new int [poolsize];\r
259         \r
260         geneIndex = new int [poolsize];   \r
261            \r
262 }\r
263    \r
264    \r
265    \r
266 void FreeMemory(){\r
267         \r
268    delete [] genepool;\r
269    \r
270    if(recombVector!=NULL)delete [] recombVector;\r
271    \r
272 }\r
273    \r
274   \r
275 void FreeMutationMemory(){\r
276         \r
277    delete [] fragmentLength;\r
278    delete [] geneIndex;\r
279    delete [] branchMutation;\r
280    \r
281         \r
282 }\r
283 \r
284 void FreeCoalescentMemory(){\r
285         \r
286    delete [] MRCA;\r
287    delete [] MRCAtimes;\r
288 \r
289    delete [] fragments;\r
290 \r
291    FreeIntPtrMatrix(children,maxN);\r
292    \r
293    FreeIntPtrMatrix(parents,maxN);\r
294 \r
295    delete [] child_flags;\r
296    delete [] parent_flags;\r
297 \r
298    delete [] cblocks;\r
299    delete [] pblocks;\r
300    \r
301    delete [] parent_First;\r
302    delete [] parent_Last;\r
303 \r
304    delete [] child_First;\r
305    delete [] child_Last;\r
306         \r
307 }\r
308 \r
309 void reallocBlock(){\r
310         \r
311         pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
312         cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
313         BUFFER += INCREMENT;    \r
314         \r
315 }\r
316 \r
317 void setnull(int parent, int position){\r
318         \r
319   if (parent_flags[parent] == 0){\r
320                 parent_First[parent]=position / BLOCKSIZE;\r
321                 parent_Last[parent]=position / BLOCKSIZE;       \r
322                 parents[parent][position / BLOCKSIZE] = NULL;  \r
323   }\r
324   else if(parent_First[parent]>position / BLOCKSIZE){\r
325           for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){\r
326                   parents[parent][i] = NULL;\r
327           }\r
328           parent_First[parent]=position / BLOCKSIZE;\r
329   }\r
330   else if(parent_Last[parent]<position / BLOCKSIZE){\r
331           for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){\r
332                   parents[parent][i] = NULL;\r
333           }\r
334           parent_Last[parent]=position / BLOCKSIZE;\r
335   }\r
336   \r
337 }\r
338 \r
339 \r
340 \r
341 void TouchParent(int parent, int position){\r
342 \r
343   \r
344   setnull(parent,position);\r
345                 \r
346   if (parents[parent][position / BLOCKSIZE] != NULL)return;\r
347  \r
348 \r
349   if (pblockcounter >= n*numBLOCKs+BUFFER){\r
350             \r
351             int * pblocks_old=pblocks;\r
352             int * cblocks_old=cblocks;\r
353             \r
354             reallocBlock();\r
355             \r
356             for(int i=0;i<N;i++)\r
357               if(parent_flags[i])\r
358                 for(int j=parent_First[i];j<=parent_Last[i];j++)\r
359                         if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;\r
360             \r
361             for(int i=0;i<N;i++)\r
362               if(child_flags[i])\r
363                 for(int j=child_First[i];j<=child_Last[i];j++)\r
364                         if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;\r
365                 \r
366                 printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);\r
367                 \r
368                 if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");\r
369   }\r
370 \r
371   parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;\r
372   \r
373   for (int i = 0; i < BLOCKSIZE; i++)\r
374       parents[parent] [position / BLOCKSIZE] [i] = -1;\r
375       \r
376   parent_flags[parent] = 1;\r
377   \r
378 }\r
379 \r
380 void Initialize(){\r
381         \r
382    genetimes = new int [poolsize];\r
383            \r
384    MRCA = new int [L];\r
385    MRCAtimes = new int [L];\r
386 \r
387    fragments = new int [L];\r
388 \r
389    children = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
390    parents = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
391    \r
392    parent_First = new int [maxN];\r
393    parent_Last = new int [maxN];\r
394 \r
395    child_First = new int [maxN];\r
396    child_Last = new int [maxN];\r
397 \r
398    cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));\r
399    pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE))); \r
400   \r
401    child_flags = new bool [maxN];\r
402    parent_flags = new bool [maxN];\r
403    \r
404    \r
405    noChange = true;\r
406         \r
407    nPOP = (int)(popProfile[0].popsize.size());\r
408    \r
409    N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
410    \r
411    currentProfile = 0;\r
412         \r
413    nextFree = 0;\r
414    \r
415    for (int i = 0; i < N; i++)\r
416       child_flags[i] = 0;\r
417      \r
418    int count=0;\r
419    for(int k=0;k<nPOP;k++){    \r
420            for (int i = 0; i < nSample[k]; i++)\r
421               {\r
422                   child_First[popProfile[0].popStart[k]+i]=0;\r
423                   child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;\r
424 \r
425               child_flags[popProfile[0].popStart[k]+i] = 1;\r
426 \r
427               for (int j = 0; j < numBLOCKs; j++)\r
428                         children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);\r
429                 \r
430               for (int j = 0; j < L; j++)\r
431                  {\r
432                  genepool[nextFree] = 1;\r
433                  genetimes[nextFree] = 0;\r
434                  children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;\r
435                  }\r
436                  \r
437                  count++;\r
438               }\r
439    }\r
440 \r
441    for (int i = 0; i < L; i++)\r
442       fragments[i] = n;\r
443 \r
444    for (int i = 0; i < N; i++)\r
445       parent_flags[i] = 0;\r
446 \r
447    activeSegments = L;\r
448    \r
449    //for (int i = 0; i < N; i++)\r
450    //  for (int j = 0; j < numBLOCKs; j++)\r
451    //     parents[i][j] = NULL;\r
452       \r
453    pblockcounter = 0;\r
454  \r
455 }\r
456 \r
457 void InitializeMutation(vector< vector<bool> > &return_chromosome){\r
458         \r
459 //         return_chromosome.clear();\r
460 //    return_chromosome.resize(n);\r
461 //    for(int i=0;i<n;i++){\r
462 //         return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));\r
463 //      }\r
464 //    \r
465 //    chromosome.resize(n);\r
466 //    for(int i=0;i<n;i++){\r
467 //              chromosome[i].resize(L);\r
468 //              for(int j=0;j<L;j++){\r
469 //                 chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));\r
470 //              }\r
471 //    }\r
472         \r
473 //    cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
474    \r
475    for(int i=0;i<n;i++){\r
476            for(int j=0;j<L;j++){\r
477                    chromosome[i][j].clear();\r
478            }\r
479    }\r
480    \r
481 //    cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
482    \r
483    branchMutation = new int [nextFree];\r
484    for(int i=0;i<nextFree;i++)branchMutation[i]=0;\r
485 \r
486 }\r
487 \r
488 void PrepareMutation(){\r
489         \r
490    for(int i=0;i<L;i++)fragmentLength[i]=0;\r
491    \r
492    for(int i=0;i<nextFree;i++){\r
493            geneIndex[i] = -1; \r
494            branchFragment[i] = -1;\r
495    }    \r
496         \r
497 }\r
498    \r
499 int SampleParent(int individual, int previous){\r
500         \r
501         if(fixedPOP){      \r
502            int population = individual/popProfile[0].popsize[0]; \r
503            \r
504            if (previous == -1 && nPOP>1){\r
505               if (globalRandom.Next() < migration){\r
506                       int random = globalRandom.NextInt()%(nPOP-1); \r
507                   if(population<=random)population=random+1;\r
508                   else population=random;\r
509               }\r
510            }\r
511               \r
512            return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];\r
513    }\r
514    else{\r
515            \r
516            if(noChange){\r
517                    int population=0; \r
518            \r
519                    for(int i=0;i<nPOP;i++){\r
520                            if(individual>=popProfile[currentProfile].popStart[i])population=i;\r
521                            else break;\r
522                    }\r
523                    \r
524 //                 cout<<"No change: ind="<<individual<<" pop="<<population<<endl;\r
525                    \r
526                    if (previous == -1 && nPOP>1){\r
527                       if (globalRandom.Next() < migration){\r
528                               int random = globalRandom.NextInt()%(nPOP-1); \r
529                           if(population<=random)population=random+1;\r
530                           else population=random;\r
531                       }\r
532                    }\r
533                    \r
534 //                 cout<<"new pop="<<population<<endl;\r
535                       \r
536                    return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];\r
537            }\r
538            else{\r
539                    int population=0; \r
540            \r
541                    for(int i=0;i<nPOP;i++){\r
542                            if(individual>=popProfile[currentProfile].popStart[i])population=i; \r
543                            else break;\r
544                    }\r
545 //                 cout<<"Change: ind="<<individual<<" pop="<<population<<endl;\r
546                    \r
547                    if (previous == -1 && nPOP>1){\r
548                       if (globalRandom.Next() < migration){\r
549                               int random = globalRandom.NextInt()%(nPOP-1); \r
550                           if(population<=random)population=random+1;\r
551                           else population=random;\r
552                       }\r
553                    }\r
554                    \r
555 //                 cout<<"new pop="<<population<<endl;\r
556                    \r
557                    if(popProfile[currentProfile].popChange[population].size()==1){\r
558 //                         cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;\r
559                            \r
560                                 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]]) \r
561                                                 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]];      \r
562                    }\r
563                    else{\r
564                                 double selectRandom = globalRandom.Next();\r
565                                 for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){\r
566                                         if(selectRandom < popProfile[currentProfile].selectProb[population][i]){\r
567                                                 \r
568 //                                              cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;\r
569                                                 \r
570                                                 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]]) \r
571                                                 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];\r
572                                         }       \r
573                                 }\r
574                                 cout<<"Random number generator error!"<<endl;\r
575                                 exit(1);\r
576                    }\r
577                    \r
578                    \r
579            }\r
580            \r
581    }\r
582 }\r
583 \r
584 void readPopProfile(string filename, unsigned int numPopulation){\r
585            \r
586            int popProfile_index=0;\r
587            \r
588            popProfile.clear();\r
589            struct popProfilesStruct temp_popProfile;\r
590            popProfile.push_back(temp_popProfile);       \r
591                                    \r
592            ifstream popFile;\r
593            string fileline;\r
594            char *term;\r
595            char *term1;\r
596            \r
597            popFile.open(filename.c_str());\r
598            if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());\r
599            \r
600            getline(popFile,fileline);                                   // the first line is the population sizes at generation 0\r
601            \r
602            term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
603            popProfile[popProfile_index].generation=atoi(term);\r
604            \r
605            if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");\r
606            \r
607            term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
608            while(term!=NULL){\r
609                            popProfile[popProfile_index].popsize.push_back(atoi(term));\r
610                            term = strtok (NULL, " \t");\r
611            }\r
612 \r
613            popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
614            for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
615                    popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
616            }\r
617                            \r
618            maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
619            \r
620            if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());\r
621            \r
622            // read in the population correspondence to the next population profile\r
623            \r
624                    \r
625            while(popFile.peek()!=EOF){\r
626                    getline(popFile,fileline);                                           // the even number lines are the correspondence of populations of different generation\r
627                    popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());\r
628                    \r
629                    term = strtok ((char*)fileline.c_str(),"- \t");  // the FROM population\r
630                    while(term!=NULL){\r
631                            \r
632                            term1 = strtok (NULL, "- \t");                               // the TO population\r
633 \r
634                            popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);\r
635                            \r
636                            term = strtok (NULL, "- \t");\r
637                    }\r
638                    \r
639                    popProfile_index++;\r
640                    popProfile.push_back(temp_popProfile);\r
641                    \r
642                    if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");\r
643                    \r
644                    // read the next population profile\r
645                    \r
646                    getline(popFile,fileline);                                   // the population sizes\r
647    \r
648                    term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
649                    popProfile[popProfile_index].generation=atoi(term);\r
650                    \r
651                    term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
652                    while(term!=NULL){\r
653                                    popProfile[popProfile_index].popsize.push_back(atoi(term));\r
654                                    term = strtok (NULL, " \t");\r
655                    }\r
656                    \r
657                    popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
658                    for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
659                            popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
660                    }\r
661                    \r
662                    if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())\r
663                                         maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
664            }    \r
665            \r
666            for(unsigned int i=0;i<popProfile.size();i++){\r
667                    popProfile[i].selectProb.resize(popProfile[i].popChange.size());\r
668                    for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
669                            float sum=0.0;\r
670                            for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];\r
671                            \r
672                            popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);\r
673                            for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)\r
674                                         popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);\r
675                    }\r
676            }\r
677            // check for consistence\r
678            \r
679            int min, max;\r
680            \r
681            for(unsigned int i=0;i<popProfile.size()-1;i++){\r
682                         min=99999;max=-1;\r
683                         for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
684                                 if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1);     \r
685                                 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){\r
686                                         if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];\r
687                                         if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];\r
688                                 }\r
689                         }   \r
690                         if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))\r
691                                 error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());\r
692            }\r
693         \r
694 }\r
695 \r
696 \r
697 void readRecombination(string filename, int pieces){\r
698 \r
699            ifstream recombFile;\r
700            string fileline;\r
701            char *term;\r
702                 \r
703            vector<float> recDistribution;\r
704            vector<float> recRate;\r
705            \r
706            float sum=0.0;\r
707            float random;\r
708            \r
709            recombFile.open(filename.c_str());\r
710            if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());\r
711            \r
712            getline(recombFile,fileline);                // the first line is head of distribution table\r
713            term = strtok ((char*)fileline.c_str()," \t");\r
714            term = strtok (NULL, " \t"); \r
715            \r
716            if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){\r
717                    while(recombFile.peek()!=EOF){\r
718                            getline(recombFile,fileline);\r
719                            \r
720                            term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate\r
721                            recRate.push_back((float)(atof(term)));\r
722                            \r
723                            if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));\r
724                            \r
725                            term = strtok (NULL, " \t");                                   // the second number is the frequency of recombination rate\r
726                            recDistribution.push_back((float)(atof(term)));\r
727                            \r
728                            if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));\r
729                            \r
730                            sum += (float)(atof(term));\r
731                    }\r
732                    \r
733                    if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());\r
734                    \r
735                    // normalize the frequency \r
736                    for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;\r
737                    \r
738                    // compute the cumulative distribution\r
739                    for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];\r
740                    \r
741                    recombVector = new float [pieces-1];\r
742                    \r
743                    for(int i=0;i<pieces-1;i++){\r
744                            random = (float)(globalRandom.Next());\r
745                            for(unsigned int j=0;j<recDistribution.size();j++){\r
746                                         if(random < recDistribution[j]){\r
747                                                 recombVector[i]=recRate[j];\r
748                                                 break;   \r
749                                         }\r
750                            }\r
751                    }\r
752                    \r
753                         //check the input\r
754                         cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;\r
755                         cout<<"Recombination_Rate\tCumulative_frequency"<<endl;\r
756                         for(unsigned int i=0;i<recRate.size();i++){\r
757                                 cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl;  \r
758                         }               \r
759                            \r
760            }\r
761            else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){\r
762                    float currentRec=0.0,nextRec=0.0;\r
763                    int nextPos=-1;\r
764                    \r
765                    if(recombFile.peek()!=EOF){\r
766                            getline(recombFile,fileline);\r
767                            term = strtok ((char*)fileline.c_str()," \t");\r
768                            currentRec=(float)(atof(term));\r
769                            \r
770                            if(recombFile.peek()!=EOF){\r
771                                    getline(recombFile,fileline);\r
772                                    term = strtok ((char*)fileline.c_str()," \t");\r
773                                nextRec=(float)(atof(term));\r
774                                term = strtok (NULL, " \t");     \r
775                                nextPos=atoi(term);\r
776                            }\r
777                            else{\r
778                                    nextPos=-1;\r
779                            }\r
780                            \r
781                    }\r
782                    else{\r
783                            error("Recombination file does not contain recombination rate information.\n");\r
784                    }\r
785                    \r
786                    recombVector = new float [pieces-1];\r
787                    \r
788                    for(int i=0;i<pieces-1;i++){\r
789                            if(i+1==nextPos){\r
790                                     currentRec=nextRec;\r
791                                     if(recombFile.peek()!=EOF){\r
792                                                 getline(recombFile,fileline);\r
793                                                 term = strtok ((char*)fileline.c_str()," \t");\r
794                                         nextRec=(float)(atof(term));\r
795                                         term = strtok (NULL, " \t");    \r
796                                         nextPos=atoi(term);\r
797                                         }\r
798                                         else{\r
799                                                 nextPos=-1;\r
800                                         }\r
801                                 }\r
802                                 \r
803                                 recombVector[i]=currentRec;\r
804                    }\r
805                    \r
806                    \r
807                    \r
808            }\r
809            else{\r
810                    error("The second column is not \"freq\" or \"pos\".\n");\r
811            }\r
812            \r
813            \r
814 \r
815            \r
816            cout<<"The recombination rates between fragments are:"<<endl<<"\t";\r
817            for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";\r
818            cout<<"("<<pieces<<")"<<endl;\r
819            cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;\r
820                 \r
821 }\r
822 \r
823 void drawtrees(){\r
824         \r
825         map<int,string > tree;\r
826         ostringstream  temp;\r
827         \r
828                            \r
829         for(int i=0;i<L;i++){\r
830                 tree.clear();\r
831                 \r
832                 for(int j=0;j<n;j++){\r
833                         if(tree[genepool[j*L+i]]!=""){\r
834                                 temp.str("");\r
835                                 temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
836                                 tree[genepool[j*L+i]]=  temp.str();\r
837                         }\r
838                         else{\r
839                                 temp.str("");\r
840                                 temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
841                                 tree[genepool[j*L+i]]=  temp.str();\r
842                         }\r
843                 }\r
844                 \r
845         \r
846                 for(int j=n*L;j<nextFree;j++){\r
847                         if(tree[j]!=""){\r
848                                 if(genepool[j]==0){\r
849                                         cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;\r
850                                 }\r
851                                 else{                                                   \r
852                                         if(tree[genepool[j]]!=""){\r
853                                                 temp.str("");\r
854                                                 temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
855                                                 tree[genepool[j]]=      temp.str();\r
856                                         }\r
857                                         else{\r
858                                                 temp.str("");\r
859                                                 temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
860                                                 tree[genepool[j]]=temp.str();\r
861                                         }\r
862                                 }\r
863                         }\r
864                 }\r
865         }\r
866         \r
867         tree.clear();\r
868 }\r
869 \r
870 \r
871 \r
872 \r
873 void genome(string popSize,                                             // effective population size of each population\r
874                   int nSubPOP,                                                  // number of populations\r
875                   vector<int> & nSubSample,                     // number of samples (haplotypes) draw from each populations\r
876                   int numPieces,                                                // number of fragments for each sample (chromosome)\r
877                   int pieceLen,                                                 // length in base pair of each fragment\r
878                   int numIndepRegion,                                   // number of independent regions (independent chromosome)\r
879                   int s,                                                                // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution \r
880                   string rec,                                                   // recombination rate between consecutive fragments per generation\r
881                   double mut,                                                   // mutation rate per generation per base pair\r
882                   double mig,                                                   // migration rate per generation\r
883                   vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
884                   long t,                                                               // random seed \r
885                   int drawtree,                                         // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees   \r
886                   bool printparameters                                  // =1 print out all input parameters, =0 do not print input parameters\r
887                   )\r
888 {   \r
889    if(atoi(popSize.c_str())>0){                 // if the population size is fixed during evoluation\r
890            N = atoi(popSize.c_str())*nSubPOP;\r
891            maxN = N;\r
892            \r
893            fixedPOP = true;\r
894            \r
895            popProfile.clear();\r
896            struct popProfilesStruct temp_popProfile;\r
897            popProfile.push_back(temp_popProfile);\r
898            \r
899            popProfile[0].generation = 0;\r
900            \r
901            for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));\r
902            for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));\r
903            \r
904            cout<<"*** POPULATION PROFILE ***"<<endl;\r
905            cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;\r
906            cout<<"Sizes of populations = ";\r
907            for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";\r
908            cout<<endl;\r
909 //         cout<<"POP Start index = ";\r
910 //         for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";\r
911 //         cout<<endl;\r
912            cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;\r
913                         \r
914    }\r
915    else{                                                                        // popSize stores the filename of the population profile during evoluation\r
916            fixedPOP = false;      \r
917         \r
918            currentProfile = 0;\r
919            \r
920            readPopProfile(popSize,nSubPOP);\r
921            \r
922            N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
923            \r
924            // print out the population profile information.\r
925            \r
926            cout<<"*** POPULATION PROFILE ***"<<endl;\r
927            cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;\r
928            cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;\r
929            \r
930            for(unsigned int i=0;i<popProfile.size();i++){\r
931                         cout<<"Generation = "<<popProfile[i].generation<<endl;\r
932                         cout<<"Sizes of populations= ";\r
933                         for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";\r
934                         cout<<endl;\r
935 //                      cout<<"POP Start index = ";\r
936 //                      for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";\r
937 //                      cout<<endl;\r
938                         if(popProfile[i].popChange.size()>0){\r
939                                 cout<<"Populations change backward in time:"<<endl;\r
940                                 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
941                                         cout<<" From "<<j+1<<" to ";\r
942                                         for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";\r
943                                         cout<<endl;\r
944                                 }\r
945                                 \r
946 //                              cout<<"Selection cumulative probability:"<<endl;   \r
947 //                              for(int j=0;j<popProfile[i].selectProb.size();j++){\r
948 //                                      cout<<" From "<<j+1<<" with prob = ";\r
949 //                                      for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";\r
950 //                                      cout<<endl;\r
951 //                              }\r
952                         }\r
953                         if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;\r
954                         cout<<endl;\r
955            }\r
956    }\r
957    \r
958    n=0;\r
959    nPOP=nSubPOP;\r
960    nSample.resize(nPOP);\r
961    for(int i=0;i<nPOP;i++){\r
962            if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");\r
963            n += nSubSample[i];\r
964            nSample[i]=nSubSample[i];\r
965    }\r
966    \r
967    L = numPieces;\r
968    length = pieceLen;\r
969    numChr = numIndepRegion;\r
970    SNP=s;\r
971    \r
972    if(atof(rec.c_str())>0){                             // if the recombination rate is fixed\r
973            recombination = atof(rec.c_str());\r
974    }\r
975    else{                                                // the distribution of recombination rate is described in the file (filename stored in rec)\r
976            recombination = -1;\r
977            readRecombination(rec,L);\r
978    }\r
979    \r
980    \r
981    \r
982    mutation = mut;\r
983    migration = mig;\r
984    \r
985    if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t\r
986    cout<<endl<<"random seed="<<t<<endl;\r
987    globalRandom.Reset(t);       \r
988    \r
989    if(printparameters){\r
990            printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);\r
991            cout<<"subSample = ";\r
992            for(int i=0;i<nPOP;i++){\r
993                    cout<<nSample[i]<<" ";\r
994            }\r
995            cout<<endl;\r
996    }\r
997    \r
998    if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){\r
999            cout<<"Input parameters have to be positive."<<endl;\r
1000            printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);\r
1001            exit(0);\r
1002    }\r
1003 \r
1004    poolsize = 2 * n * L - L;\r
1005 \r
1006    AllocateMemory(return_chromosome);\r
1007 \r
1008 //    cout<<"memory OK!"<<endl<<endl;\r
1009    \r
1010         for(int chr=0;chr<numChr;chr++){\r
1011                 \r
1012            Initialize();\r
1013            \r
1014 //         cout<<"Initialization OK!"<<endl;\r
1015         \r
1016            bool done = false;\r
1017            int  generation = 0, units = n * L;\r
1018            \r
1019            while (!done)\r
1020               {\r
1021         //       printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%]   %c",\r
1022         //              generation++, units, nextFree * 100. / poolsize,\r
1023         //              !debug ? '\r' : '\n');\r
1024         \r
1025                   generation++;\r
1026                   \r
1027                   if(!fixedPOP && currentProfile<popProfile.size()-1)\r
1028                                 if(generation == popProfile[currentProfile+1].generation)noChange=false;\r
1029                   \r
1030               //if (debug)\r
1031               //   {\r
1032               //   printf("F  : ");\r
1033               //   for (int i = 0; i < L; i++)\r
1034               //      printf("%3d ", fragments[i]);\r
1035               //   printf("\n");\r
1036               //   }\r
1037         \r
1038               // Position of the first segment allocated to the current generation\r
1039               int generationStart = nextFree;\r
1040         \r
1041               // For each individual in the population\r
1042               for (int i = 0; i < N && !done; i++)\r
1043                  if (child_flags[i])\r
1044                     {\r
1045                             \r
1046                     /*if (debug) printf("%2d : ", i);*/\r
1047         \r
1048                     // The position we are currently tracking and its distance\r
1049                     // from the previous position\r
1050                     int distance = 0, parent = -1;\r
1051                     int startPiece = 0;\r
1052                     float recombProb;\r
1053            \r
1054                             for (int block = child_First[i]; block <= child_Last[i]; block++)\r
1055                 if (children[i][block] == NULL)\r
1056                    distance += BLOCKSIZE;\r
1057                 else\r
1058                    {\r
1059                    int pos = BLOCKSIZE * block;\r
1060                    int pos_in_block;\r
1061                                    int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );\r
1062 \r
1063                    while (pos <  pos_bound )\r
1064                        {\r
1065                                \r
1066                            pos_in_block = pos % BLOCKSIZE;\r
1067                        // Skip through uninformative positions along each chromosome\r
1068                        if (children[i][block][pos_in_block] == -1)\r
1069                           {\r
1070                           //if (debug) printf("  . \n");\r
1071                           distance++;\r
1072                           pos++;\r
1073                           continue;\r
1074                           }\r
1075                          \r
1076                        // Pick an ancestor at random for the current piece of chromosome\r
1077                        \r
1078                        if(recombination>0){                     // if the recombination rate is fixed over the genome\r
1079                                recombProb = (float)(pow(1.0 - recombination, distance));\r
1080                        }\r
1081                        else{                                            // if the recombination rate is specified by the profile\r
1082                                recombProb = 1;\r
1083                                for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);\r
1084                        }\r
1085                        \r
1086                     \r
1087                        if (parent == -1 ||\r
1088                            (distance > 0 && globalRandom.Next() > recombProb)){\r
1089                                     parent = SampleParent(i, parent);\r
1090                         }\r
1091                             \r
1092                        \r
1093                        TouchParent(parent, pos);\r
1094                        \r
1095                        \r
1096                        // Reset distance between segments\r
1097                        distance = 1;\r
1098                        startPiece=pos;\r
1099 \r
1100                        //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);\r
1101                        \r
1102                        // Check if we sampled a new ancestral segment ...\r
1103                        if (parents[parent][block][pos_in_block] == -1)\r
1104                           {\r
1105                           // We delay sampling a new gene from the pool until we know\r
1106                           // a coalescent event occured ...\r
1107                           parents[parent][block][pos_in_block] = children[i][block][pos_in_block];\r
1108                           \r
1109                           }\r
1110                        // Or if a coalescent event occured\r
1111                        else\r
1112                           {\r
1113                           // If we previously delayed sampling this parent\r
1114                           if (parents[parent][block][pos_in_block] < generationStart)\r
1115                             {\r
1116                                     genetimes[nextFree] = generation;\r
1117                                     genepool[parents[parent][block][pos_in_block]] = nextFree;\r
1118                                     parents[parent][block][pos_in_block] = nextFree++;\r
1119 \r
1120                                     if (nextFree == poolsize && units != 2)\r
1121                                        error("Genepool exhausted\n");\r
1122                             }\r
1123 \r
1124                           genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];\r
1125                           \r
1126                           fragments[pos]--;\r
1127                           units--;\r
1128 \r
1129                          if (fragments[pos] == 1)\r
1130                             {\r
1131                             // Reached the MRCA for this fragment\r
1132                             MRCAtimes[pos] = generation;\r
1133                             MRCA[pos] = parents[parent][block][pos_in_block];\r
1134                             \r
1135                             genepool[parents[parent][block][pos_in_block]]=0;\r
1136                             \r
1137                             parents[parent][block][pos_in_block] = -1;\r
1138                             units--;\r
1139          \r
1140                             // One less segment to track\r
1141                             if (--activeSegments == 0)\r
1142                                {\r
1143                                done = true;\r
1144                                break;\r
1145                                }\r
1146                             }\r
1147                          }\r
1148                       pos++;\r
1149                       }\r
1150                    }\r
1151                     }\r
1152                     \r
1153               NewGeneration();\r
1154             \r
1155               }\r
1156         \r
1157            if(drawtree){\r
1158                    cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;\r
1159                    drawtrees();\r
1160            }  \r
1161               \r
1162            // assign mutations\r
1163 \r
1164            cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;\r
1165            \r
1166            FreeCoalescentMemory();\r
1167            \r
1168            cout<<"Coalescent Memory free"<<endl;\r
1169            \r
1170            InitializeMutation(return_chromosome);\r
1171 \r
1172            cout<<"Mutation process initialized"<<endl;\r
1173                 \r
1174 //          cout<<"genepool:\t";\r
1175 //              for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
1176 //              cout<<endl;\r
1177 //              \r
1178 //              cout<<"genetimes:\t";\r
1179 //              for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
1180 //              cout<<endl;\r
1181 //         \r
1182 //              cout<<"nextFree="<<nextFree<<endl;\r
1183                 \r
1184 //              cout<<"mutation:\t";\r
1185 //              for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
1186 //              cout<<endl;\r
1187                 \r
1188            if(SNP<=0){\r
1189                    for(int i=0;i<nextFree;i++){\r
1190                                 if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation);   \r
1191                    }\r
1192            }\r
1193            else{\r
1194                    unsigned long * cumulativeCount = new unsigned long [nextFree];\r
1195                    \r
1196                    if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];\r
1197                    else cumulativeCount[0] = 0;\r
1198                    \r
1199                    for(int i=1;i<nextFree;i++){\r
1200                                 if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i]; \r
1201                                 else cumulativeCount[i] = cumulativeCount[i-1];  \r
1202                    }\r
1203                    \r
1204                    unsigned long totalGeneration = cumulativeCount[nextFree - 1];\r
1205                    \r
1206 //                 cout<<"Total Generation = "<<totalGeneration<<endl;\r
1207 //                 \r
1208 //                 if((unsigned long)SNP>totalGeneration){\r
1209 //                              cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl;   \r
1210 //                 }\r
1211                    \r
1212                    for(int i=0;i<SNP;i++){\r
1213                            \r
1214                            unsigned long randomLong = globalRandom.NextInt()%totalGeneration;\r
1215                            \r
1216                            int begin = 0;\r
1217                            int end = nextFree - 1;\r
1218                            int middle = (begin+end)/2;\r
1219                            \r
1220                            while(1){\r
1221                                    if(cumulativeCount[middle]<randomLong){\r
1222                                                 begin = middle;\r
1223                                    }\r
1224                                    else{\r
1225                                                 end = middle;\r
1226                                    }\r
1227                                    \r
1228                                    if(end == begin+1)break;\r
1229                                    \r
1230                                    middle = (begin+end)/2;\r
1231                            }\r
1232                            \r
1233                            if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){\r
1234                                         cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
1235                                         error("see above message!\n");\r
1236                            }\r
1237                            \r
1238                            int sampleIndex;\r
1239                            \r
1240                            if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;\r
1241                            else sampleIndex = end;\r
1242                            \r
1243                            if(genepool[sampleIndex]==0){\r
1244                                    cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
1245                                    error("see above message!\n");\r
1246                            }\r
1247                            \r
1248                            branchMutation[sampleIndex]++;\r
1249                    }\r
1250                    \r
1251                    delete [] cumulativeCount;\r
1252                    \r
1253            }\r
1254    \r
1255            delete [] genetimes;\r
1256            \r
1257            cout<<"Mutation assigned"<<endl;\r
1258             \r
1259 //      cout<<"mutation:\t";\r
1260 //              for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
1261 //              cout<<endl;\r
1262 \r
1263            AllocateMutationMemory();\r
1264            \r
1265            cout<<"Mutation Memory allocated"<<endl;\r
1266                 \r
1267            PrepareMutation();\r
1268         \r
1269            for(int i=0;i<n;i++){\r
1270                         for(int j=0;j<L;j++){\r
1271                                 if(branchMutation[i*L+j]>0){\r
1272                                         geneIndex[i*L+j] = fragmentLength[j];\r
1273                                         fragmentLength[j] += branchMutation[i*L+j];\r
1274                                 }\r
1275                                 branchFragment[i*L+j] = j;\r
1276                         }\r
1277            }   \r
1278               \r
1279            \r
1280            for(int i=0;i<nextFree;i++){\r
1281                         if(genepool[genepool[i]]!=0 && genepool[i]!=0){\r
1282                            if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){\r
1283                                    geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];\r
1284                                    fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];\r
1285                            }\r
1286                            branchFragment[genepool[i]]=branchFragment[i];\r
1287                         }\r
1288            }\r
1289            \r
1290            //cout<<"branchfragment done"<<endl;\r
1291            \r
1292 //      cout<<"branchFragment:\t";\r
1293 //              for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";\r
1294 //              cout<<endl;\r
1295 \r
1296 //      cout<<"geneIndex:\t";\r
1297 //              for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";\r
1298 //              cout<<endl;\r
1299 //      \r
1300 //              cout<<"fragmentLength:\t";\r
1301 //              for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";\r
1302 //              cout<<endl;\r
1303                 \r
1304 \r
1305 //          cout<<"genepool genetime mutation frag index\n";\r
1306 //              for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;\r
1307 //              cout<<endl;\r
1308            \r
1309 \r
1310    \r
1311        delete [] branchFragment;\r
1312        \r
1313            cout<<"Internal memory free"<<endl;\r
1314            \r
1315            // print the fragment index for each SNP\r
1316            cout<<"SNP genetic position scaled to [0,1]:"<<endl;\r
1317            if(L>1){\r
1318                    for(int i=0;i<L;i++){\r
1319                                 for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" ";   \r
1320                    }\r
1321            }\r
1322            else{\r
1323                    cout<<"Only one fragment to be simulated. No recombination between SNPs.";\r
1324            }\r
1325            cout<<endl;\r
1326                    \r
1327            int totalLength=0;\r
1328            if(SNP <=0){ \r
1329                    for(int i=0;i<L;i++)totalLength += fragmentLength[i];\r
1330            }\r
1331            \r
1332            \r
1333            for(int i=0;i<n;i++){\r
1334                         for(int j=0;j<L;j++){\r
1335                                 chromosome[i][j].resize(fragmentLength[j],0);\r
1336 \r
1337                                 for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;\r
1338                                 \r
1339                                 int current=i*L+j;\r
1340                 \r
1341                                 while(genepool[genepool[current]]!=0){\r
1342                                         for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;\r
1343                                         current=genepool[current];\r
1344                                 }\r
1345                         }\r
1346            }\r
1347            \r
1348            cout<<"Chromosome done"<<endl;\r
1349            \r
1350            FreeMutationMemory();\r
1351            \r
1352            if(SNP <=0){ \r
1353                    for(int i=0;i<n;i++){\r
1354                                 return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);\r
1355                    }\r
1356            }\r
1357            \r
1358            for(int i=0;i<n;i++){\r
1359                         for(int j=0;j<L;j++){\r
1360                                 return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());\r
1361                         }\r
1362            }     \r
1363            \r
1364            cout<<"Return chromosome done"<<endl;\r
1365            \r
1366            // print the chromosomes\r
1367 //         for(int i=0;i<n;i++){\r
1368 //                      printf("Chr %d: ",i);\r
1369 //                      for(int j=0;j<L;j++){\r
1370 //                              for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];\r
1371 //                              cout<<" ";      \r
1372 //                      }\r
1373 //                      cout<<endl;\r
1374 //         }       \r
1375 //         \r
1376 //         \r
1377 //        for(int i=0;i<n;i++){\r
1378 //                      printf("return Chr %d: ",i);\r
1379 //                      for(int j=0;j<return_chromosome[i].size();j++){\r
1380 //                              cout<<return_chromosome[i][j];\r
1381 //                      }\r
1382 //                      cout<<endl;\r
1383 //         }\r
1384 \r
1385            \r
1386    \r
1387    }\r
1388    \r
1389    FreeMemory();\r
1390    \r
1391    printf("\nDone simulating ARG ...\n");\r
1392    \r
1393    \r
1394    \r
1395 }\r
1396 \r
1397 \r