]> git.donarmstrong.com Git - genome.git/blob - genome.cpp
add cxflags and debugging flags
[genome.git] / genome.cpp
1 ////////////////////////////////////////////////////////////////////// \r
2 // genome.cpp\r
3 //////////////////////////////////////////////////////////////////////////////\r
4 //              COPYRIGHT NOTICE FOR GENOME CODE\r
5 //\r
6 // Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
7 // Version 0.2\r
8 // All rights reserved.\r
9 //\r
10 //   Redistribution and use in source and binary forms, with or without\r
11 //   modification, are permitted provided that the following conditions\r
12 //   are met:\r
13 //\r
14 //     1. Redistributions of source code must retain the above copyright\r
15 //        notice, this list of conditions and the following disclaimer.\r
16 //\r
17 //     2. Redistributions in binary form must reproduce the above copyright\r
18 //        notice, this list of conditions and the following disclaimer in the\r
19 //        documentation and/or other materials provided with the distribution.\r
20 //\r
21 //     3. The names of its contributors may not be used to endorse or promote\r
22 //        products derived from this software without specific prior written\r
23 //        permission.\r
24 //\r
25 //   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
26 //   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
27 //   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
28 //   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
29 //   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
30 //   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
31 //   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
32 //   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
33 //   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
34 //   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
35 //   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
36 //\r
37 ///////////////////////////////////////////////////////////////////////////////\r
38 \r
39 \r
40 #define _CRT_SECURE_NO_DEPRECATE \r
41 \r
42 #include "Random.h"\r
43 #include "Error.h"\r
44 #include "stochastic.h" \r
45 #include <iostream>\r
46 #include <fstream>\r
47 #include <math.h>\r
48 #include <vector> \r
49 #include <time.h>\r
50 #include <algorithm>\r
51 #include <map>\r
52 #include <sstream>\r
53 #include <stdio.h>\r
54 #include <string.h>\r
55 \r
56 using namespace std;\r
57 \r
58 //static bool debug = false;\r
59 static bool fixedPOP = true;\r
60 static bool noChange = true;\r
61 \r
62 \r
63 static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment\r
64 static int maxN;\r
65 static unsigned int currentProfile;\r
66 static int nPOP=1;\r
67 static vector<int> nSample(nPOP,3);\r
68 static double recombination = 1e-8, migration = 0.0, mutation=1e-8;\r
69 \r
70 static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;\r
71 static int *** children, *** parents, * cblocks, * pblocks;\r
72 static int * parent_First, * parent_Last, * child_First, * child_Last;\r
73 static bool * child_flags, * parent_flags;\r
74 static int nextFree = 0, activeSegments = 0, pblockcounter = 0;\r
75 static int numBLOCKs;\r
76 static int BUFFER=1500;\r
77 static int INCREMENT=1000;\r
78 \r
79 #define BLOCKSIZE   128\r
80 #define SWAP(x,y) do {typeof(x) temp##x##y = x; x = y; y = temp##x##y; } while (0)\r
81 \r
82 static vector< vector< vector<bool> > > chromosome;\r
83 static int * fragmentLength;\r
84 static int * branchFragment;\r
85 static int * geneIndex;\r
86 static int * branchMutation;\r
87 \r
88 static float * recombVector=NULL;\r
89 \r
90 struct popProfilesStruct{\r
91         int generation;\r
92         vector<int> popsize;\r
93         vector<int> popStart;\r
94         vector< vector<float> > selectProb;\r
95         vector< vector<int> > popChange;\r
96 };\r
97 \r
98 static vector<struct popProfilesStruct> popProfile;\r
99 \r
100 void print(){\r
101         cout<<"\n*****************\nParents:\t";\r
102         for(int i=0;i<N;i++){\r
103                 if(parent_flags[i]){\r
104                         for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
105                         cout<<"\t";     \r
106                 }\r
107         }       \r
108         cout<<endl;\r
109         \r
110         cout<<"Children:\t";\r
111         for(int i=0;i<N;i++){\r
112                 if(child_flags[i]){\r
113                         for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
114                         cout<<"\t";     \r
115                 }\r
116         }       \r
117         cout<<endl;\r
118         \r
119         \r
120         \r
121 //      cout<<"parent_flag:\t";\r
122 //      for(int i=0;i<N;i++){\r
123 //              cout<<parent_flags[i]<<"\t";\r
124 //      }       \r
125 //      cout<<endl;\r
126 //      \r
127 //      cout<<"children_flag:\t";\r
128 //      for(int i=0;i<N;i++){\r
129 //              cout<<child_flags[i]<<"\t";\r
130 //      }       \r
131 //      cout<<endl;\r
132         \r
133         cout<<"genepool:\t";\r
134         for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
135         cout<<endl;\r
136         \r
137         cout<<"genetimes:\t";\r
138         for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
139         cout<<endl;\r
140         \r
141         cout<<"MCRA:\t";\r
142         for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";\r
143         cout<<endl;\r
144         \r
145         cout<<"MCRAtime:\t";\r
146         for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";\r
147         cout<<endl;\r
148         \r
149         cout<<"fragments:\t";\r
150         for(int i=0;i<L;i++)cout<<fragments[i]<<" ";\r
151         cout<<endl;\r
152         \r
153         cout<<"NextFree="<<nextFree<<endl;\r
154 }\r
155 \r
156 int *** AllocateIntPtrMatrix(int rows, int columns){\r
157    int *** result = new int ** [rows];\r
158 \r
159    for (int i = 0; i < rows; i++)\r
160       result[i] = new int * [columns];\r
161   \r
162    return result;\r
163 }\r
164    \r
165 void FreeIntPtrMatrix(int *** & matrix, int rows){\r
166         \r
167    for (int i = 0; i < rows; i++)\r
168       delete [] matrix[i];\r
169 \r
170    delete [] matrix;\r
171 \r
172    matrix = NULL;\r
173 }   \r
174    \r
175 void NewGeneration(){\r
176         \r
177 //    for(int i=0;i<maxN;i++){\r
178 //      if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]);   \r
179 //    }\r
180    \r
181    SWAP(children, parents);\r
182    SWAP(child_First, parent_First);\r
183    SWAP(child_Last, parent_Last);\r
184    SWAP(child_flags, parent_flags);\r
185    SWAP(cblocks, pblocks);\r
186    \r
187    if(!noChange){\r
188            currentProfile++;\r
189            nPOP = (int)(popProfile[currentProfile].popsize.size());\r
190            N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();\r
191            noChange = true;\r
192    }\r
193 \r
194    for (int i = 0; i < maxN; i++)\r
195       parent_flags[i] = 0;\r
196       \r
197 //    for (int i = 0; i < maxN; i++)\r
198 //        for (int j = 0; j < numBLOCKs; j++)\r
199 //            parents[i][j] = NULL;\r
200       \r
201    pblockcounter = 0;\r
202 \r
203 }\r
204 \r
205    \r
206 void AllocateMemory(vector< vector<bool> > &return_chromosome){\r
207    \r
208    \r
209 //    if(SNP<=0){\r
210 //              cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;\r
211 //    }\r
212 //    else{\r
213 //              cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;\r
214 //    }\r
215 //    cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;\r
216    \r
217  \r
218    return_chromosome.clear();\r
219    return_chromosome.resize(n);\r
220 //    for(int i=0;i<n;i++){\r
221 //         if(SNP<=0){\r
222 //                      //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));\r
223 //         }\r
224 //         else{\r
225 //                      //return_chromosome[i].reserve(SNP*numChr);\r
226 //         }\r
227 //      }\r
228    \r
229    chromosome.resize(n);\r
230    for(int i=0;i<n;i++){\r
231                 chromosome[i].resize(L);\r
232 //              for(int j=0;j<L;j++){\r
233 //                 if(SNP<=0){\r
234 //                         //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));\r
235 //                 }\r
236 //                 else{\r
237 //                         //chromosome[i][j].reserve(SNP/L+1);\r
238 //                 }\r
239 //              }\r
240    }\r
241   \r
242    genepool = new int [poolsize];\r
243 \r
244    numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);\r
245  \r
246    pblockcounter = 0;\r
247 \r
248 }\r
249 \r
250 \r
251 void AllocateMutationMemory(){\r
252         \r
253         fragmentLength = new int [L];\r
254         \r
255         branchFragment = new int [poolsize];\r
256         \r
257         geneIndex = new int [poolsize];   \r
258            \r
259 }\r
260    \r
261    \r
262    \r
263 void FreeMemory(){\r
264         \r
265    delete [] genepool;\r
266    \r
267    if(recombVector!=NULL)delete [] recombVector;\r
268    \r
269 }\r
270    \r
271   \r
272 void FreeMutationMemory(){\r
273         \r
274    delete [] fragmentLength;\r
275    delete [] geneIndex;\r
276    delete [] branchMutation;\r
277    \r
278         \r
279 }\r
280 \r
281 void FreeCoalescentMemory(){\r
282         \r
283    delete [] MRCA;\r
284    delete [] MRCAtimes;\r
285 \r
286    delete [] fragments;\r
287 \r
288    FreeIntPtrMatrix(children,maxN);\r
289    \r
290    FreeIntPtrMatrix(parents,maxN);\r
291 \r
292    delete [] child_flags;\r
293    delete [] parent_flags;\r
294 \r
295    delete [] cblocks;\r
296    delete [] pblocks;\r
297    \r
298    delete [] parent_First;\r
299    delete [] parent_Last;\r
300 \r
301    delete [] child_First;\r
302    delete [] child_Last;\r
303         \r
304 }\r
305 \r
306 void reallocBlock(){\r
307         \r
308         pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
309         cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
310         BUFFER += INCREMENT;    \r
311         \r
312 }\r
313 \r
314 void setnull(int parent, int position){\r
315         \r
316   if (parent_flags[parent] == 0){\r
317                 parent_First[parent]=position / BLOCKSIZE;\r
318                 parent_Last[parent]=position / BLOCKSIZE;       \r
319                 parents[parent][position / BLOCKSIZE] = NULL;  \r
320   }\r
321   else if(parent_First[parent]>position / BLOCKSIZE){\r
322           for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){\r
323                   parents[parent][i] = NULL;\r
324           }\r
325           parent_First[parent]=position / BLOCKSIZE;\r
326   }\r
327   else if(parent_Last[parent]<position / BLOCKSIZE){\r
328           for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){\r
329                   parents[parent][i] = NULL;\r
330           }\r
331           parent_Last[parent]=position / BLOCKSIZE;\r
332   }\r
333   \r
334 }\r
335 \r
336 \r
337 \r
338 void TouchParent(int parent, int position){\r
339 \r
340   \r
341   setnull(parent,position);\r
342                 \r
343   if (parents[parent][position / BLOCKSIZE] != NULL)return;\r
344  \r
345 \r
346   if (pblockcounter >= n*numBLOCKs+BUFFER){\r
347             \r
348             int * pblocks_old=pblocks;\r
349             int * cblocks_old=cblocks;\r
350             \r
351             reallocBlock();\r
352             \r
353             for(int i=0;i<N;i++)\r
354               if(parent_flags[i])\r
355                 for(int j=parent_First[i];j<=parent_Last[i];j++)\r
356                         if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;\r
357             \r
358             for(int i=0;i<N;i++)\r
359               if(child_flags[i])\r
360                 for(int j=child_First[i];j<=child_Last[i];j++)\r
361                         if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;\r
362                 \r
363                 printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);\r
364                 \r
365                 if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");\r
366   }\r
367 \r
368   parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;\r
369   \r
370   for (int i = 0; i < BLOCKSIZE; i++)\r
371       parents[parent] [position / BLOCKSIZE] [i] = -1;\r
372       \r
373   parent_flags[parent] = 1;\r
374   \r
375 }\r
376 \r
377 void Initialize(){\r
378         \r
379    genetimes = new int [poolsize];\r
380            \r
381    MRCA = new int [L];\r
382    MRCAtimes = new int [L];\r
383 \r
384    fragments = new int [L];\r
385 \r
386    children = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
387    parents = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
388    \r
389    parent_First = new int [maxN];\r
390    parent_Last = new int [maxN];\r
391 \r
392    child_First = new int [maxN];\r
393    child_Last = new int [maxN];\r
394 \r
395    cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));\r
396    pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE))); \r
397   \r
398    child_flags = new bool [maxN];\r
399    parent_flags = new bool [maxN];\r
400    \r
401    \r
402    noChange = true;\r
403         \r
404    nPOP = (int)(popProfile[0].popsize.size());\r
405    \r
406    N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
407    \r
408    currentProfile = 0;\r
409         \r
410    nextFree = 0;\r
411    \r
412    for (int i = 0; i < N; i++)\r
413       child_flags[i] = 0;\r
414      \r
415    int count=0;\r
416    for(int k=0;k<nPOP;k++){    \r
417            for (int i = 0; i < nSample[k]; i++)\r
418               {\r
419                   child_First[popProfile[0].popStart[k]+i]=0;\r
420                   child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;\r
421 \r
422               child_flags[popProfile[0].popStart[k]+i] = 1;\r
423 \r
424               for (int j = 0; j < numBLOCKs; j++)\r
425                         children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);\r
426                 \r
427               for (int j = 0; j < L; j++)\r
428                  {\r
429                  genepool[nextFree] = 1;\r
430                  genetimes[nextFree] = 0;\r
431                  children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;\r
432                  }\r
433                  \r
434                  count++;\r
435               }\r
436    }\r
437 \r
438    for (int i = 0; i < L; i++)\r
439       fragments[i] = n;\r
440 \r
441    for (int i = 0; i < N; i++)\r
442       parent_flags[i] = 0;\r
443 \r
444    activeSegments = L;\r
445    \r
446    //for (int i = 0; i < N; i++)\r
447    //  for (int j = 0; j < numBLOCKs; j++)\r
448    //     parents[i][j] = NULL;\r
449       \r
450    pblockcounter = 0;\r
451  \r
452 }\r
453 \r
454 void InitializeMutation(vector< vector<bool> > &return_chromosome){\r
455         \r
456 //         return_chromosome.clear();\r
457 //    return_chromosome.resize(n);\r
458 //    for(int i=0;i<n;i++){\r
459 //         return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));\r
460 //      }\r
461 //    \r
462 //    chromosome.resize(n);\r
463 //    for(int i=0;i<n;i++){\r
464 //              chromosome[i].resize(L);\r
465 //              for(int j=0;j<L;j++){\r
466 //                 chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));\r
467 //              }\r
468 //    }\r
469         \r
470 //    cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
471    \r
472    for(int i=0;i<n;i++){\r
473            for(int j=0;j<L;j++){\r
474                    chromosome[i][j].clear();\r
475            }\r
476    }\r
477    \r
478 //    cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
479    \r
480    branchMutation = new int [nextFree];\r
481    for(int i=0;i<nextFree;i++)branchMutation[i]=0;\r
482 \r
483 }\r
484 \r
485 void PrepareMutation(){\r
486         \r
487    for(int i=0;i<L;i++)fragmentLength[i]=0;\r
488    \r
489    for(int i=0;i<nextFree;i++){\r
490            geneIndex[i] = -1; \r
491            branchFragment[i] = -1;\r
492    }    \r
493         \r
494 }\r
495    \r
496 int SampleParent(int individual, int previous){\r
497         \r
498         if(fixedPOP){      \r
499            int population = individual/popProfile[0].popsize[0]; \r
500            \r
501            if (previous == -1 && nPOP>1){\r
502               if (globalRandom.Next() < migration){\r
503                       int random = globalRandom.NextInt()%(nPOP-1); \r
504                   if(population<=random)population=random+1;\r
505                   else population=random;\r
506               }\r
507            }\r
508               \r
509            return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];\r
510    }\r
511    else{\r
512            \r
513            if(noChange){\r
514                    int population=0; \r
515            \r
516                    for(int i=0;i<nPOP;i++){\r
517                            if(individual>=popProfile[currentProfile].popStart[i])population=i;\r
518                            else break;\r
519                    }\r
520                    \r
521 //                 cout<<"No change: ind="<<individual<<" pop="<<population<<endl;\r
522                    \r
523                    if (previous == -1 && nPOP>1){\r
524                       if (globalRandom.Next() < migration){\r
525                               int random = globalRandom.NextInt()%(nPOP-1); \r
526                           if(population<=random)population=random+1;\r
527                           else population=random;\r
528                       }\r
529                    }\r
530                    \r
531 //                 cout<<"new pop="<<population<<endl;\r
532                       \r
533                    return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];\r
534            }\r
535            else{\r
536                    int population=0; \r
537            \r
538                    for(int i=0;i<nPOP;i++){\r
539                            if(individual>=popProfile[currentProfile].popStart[i])population=i; \r
540                            else break;\r
541                    }\r
542 //                 cout<<"Change: ind="<<individual<<" pop="<<population<<endl;\r
543                    \r
544                    if (previous == -1 && nPOP>1){\r
545                       if (globalRandom.Next() < migration){\r
546                               int random = globalRandom.NextInt()%(nPOP-1); \r
547                           if(population<=random)population=random+1;\r
548                           else population=random;\r
549                       }\r
550                    }\r
551                    \r
552 //                 cout<<"new pop="<<population<<endl;\r
553                    \r
554                    if(popProfile[currentProfile].popChange[population].size()==1){\r
555 //                         cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;\r
556                            \r
557                                 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]]) \r
558                                                 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]];      \r
559                    }\r
560                    else{\r
561                                 double selectRandom = globalRandom.Next();\r
562                                 for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){\r
563                                         if(selectRandom < popProfile[currentProfile].selectProb[population][i]){\r
564                                                 \r
565 //                                              cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;\r
566                                                 \r
567                                                 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]]) \r
568                                                 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];\r
569                                         }       \r
570                                 }\r
571                                 cout<<"Random number generator error!"<<endl;\r
572                                 exit(1);\r
573                    }\r
574                    \r
575                    \r
576            }\r
577            \r
578    }\r
579 }\r
580 \r
581 void readPopProfile(string filename, unsigned int numPopulation){\r
582            \r
583            int popProfile_index=0;\r
584            \r
585            popProfile.clear();\r
586            struct popProfilesStruct temp_popProfile;\r
587            popProfile.push_back(temp_popProfile);       \r
588                                    \r
589            ifstream popFile;\r
590            string fileline;\r
591            char *term;\r
592            char *term1;\r
593            \r
594            popFile.open(filename.c_str());\r
595            if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());\r
596            \r
597            getline(popFile,fileline);                                   // the first line is the population sizes at generation 0\r
598            \r
599            term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
600            popProfile[popProfile_index].generation=atoi(term);\r
601            \r
602            if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");\r
603            \r
604            term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
605            while(term!=NULL){\r
606                            popProfile[popProfile_index].popsize.push_back(atoi(term));\r
607                            term = strtok (NULL, " \t");\r
608            }\r
609 \r
610            popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
611            for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
612                    popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
613            }\r
614                            \r
615            maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
616            \r
617            if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());\r
618            \r
619            // read in the population correspondence to the next population profile\r
620            \r
621                    \r
622            while(popFile.peek()!=EOF){\r
623                    getline(popFile,fileline);                                           // the even number lines are the correspondence of populations of different generation\r
624                    popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());\r
625                    \r
626                    term = strtok ((char*)fileline.c_str(),"- \t");  // the FROM population\r
627                    while(term!=NULL){\r
628                            \r
629                            term1 = strtok (NULL, "- \t");                               // the TO population\r
630 \r
631                            popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);\r
632                            \r
633                            term = strtok (NULL, "- \t");\r
634                    }\r
635                    \r
636                    popProfile_index++;\r
637                    popProfile.push_back(temp_popProfile);\r
638                    \r
639                    if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");\r
640                    \r
641                    // read the next population profile\r
642                    \r
643                    getline(popFile,fileline);                                   // the population sizes\r
644    \r
645                    term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
646                    popProfile[popProfile_index].generation=atoi(term);\r
647                    \r
648                    term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
649                    while(term!=NULL){\r
650                                    popProfile[popProfile_index].popsize.push_back(atoi(term));\r
651                                    term = strtok (NULL, " \t");\r
652                    }\r
653                    \r
654                    popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
655                    for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
656                            popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
657                    }\r
658                    \r
659                    if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())\r
660                                         maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
661            }    \r
662            \r
663            for(unsigned int i=0;i<popProfile.size();i++){\r
664                    popProfile[i].selectProb.resize(popProfile[i].popChange.size());\r
665                    for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
666                            float sum=0.0;\r
667                            for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];\r
668                            \r
669                            popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);\r
670                            for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)\r
671                                         popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);\r
672                    }\r
673            }\r
674            // check for consistence\r
675            \r
676            int min, max;\r
677            \r
678            for(unsigned int i=0;i<popProfile.size()-1;i++){\r
679                         min=99999;max=-1;\r
680                         for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
681                                 if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1);     \r
682                                 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){\r
683                                         if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];\r
684                                         if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];\r
685                                 }\r
686                         }   \r
687                         if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))\r
688                                 error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());\r
689            }\r
690         \r
691 }\r
692 \r
693 \r
694 void readRecombination(string filename, int pieces){\r
695 \r
696            ifstream recombFile;\r
697            string fileline;\r
698            char *term;\r
699                 \r
700            vector<float> recDistribution;\r
701            vector<float> recRate;\r
702            \r
703            float sum=0.0;\r
704            float random;\r
705            \r
706            recombFile.open(filename.c_str());\r
707            if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());\r
708            \r
709            getline(recombFile,fileline);                // the first line is head of distribution table\r
710            term = strtok ((char*)fileline.c_str()," \t");\r
711            term = strtok (NULL, " \t"); \r
712            \r
713            if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){\r
714                    while(recombFile.peek()!=EOF){\r
715                            getline(recombFile,fileline);\r
716                            \r
717                            term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate\r
718                            recRate.push_back((float)(atof(term)));\r
719                            \r
720                            if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));\r
721                            \r
722                            term = strtok (NULL, " \t");                                   // the second number is the frequency of recombination rate\r
723                            recDistribution.push_back((float)(atof(term)));\r
724                            \r
725                            if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));\r
726                            \r
727                            sum += (float)(atof(term));\r
728                    }\r
729                    \r
730                    if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());\r
731                    \r
732                    // normalize the frequency \r
733                    for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;\r
734                    \r
735                    // compute the cumulative distribution\r
736                    for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];\r
737                    \r
738                    recombVector = new float [pieces-1];\r
739                    \r
740                    for(int i=0;i<pieces-1;i++){\r
741                            random = (float)(globalRandom.Next());\r
742                            for(unsigned int j=0;j<recDistribution.size();j++){\r
743                                         if(random < recDistribution[j]){\r
744                                                 recombVector[i]=recRate[j];\r
745                                                 break;   \r
746                                         }\r
747                            }\r
748                    }\r
749                    \r
750                         //check the input\r
751                         cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;\r
752                         cout<<"Recombination_Rate\tCumulative_frequency"<<endl;\r
753                         for(unsigned int i=0;i<recRate.size();i++){\r
754                                 cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl;  \r
755                         }               \r
756                            \r
757            }\r
758            else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){\r
759                    float currentRec=0.0,nextRec=0.0;\r
760                    int nextPos=-1;\r
761                    \r
762                    if(recombFile.peek()!=EOF){\r
763                            getline(recombFile,fileline);\r
764                            term = strtok ((char*)fileline.c_str()," \t");\r
765                            currentRec=(float)(atof(term));\r
766                            \r
767                            if(recombFile.peek()!=EOF){\r
768                                    getline(recombFile,fileline);\r
769                                    term = strtok ((char*)fileline.c_str()," \t");\r
770                                nextRec=(float)(atof(term));\r
771                                term = strtok (NULL, " \t");     \r
772                                nextPos=atoi(term);\r
773                            }\r
774                            else{\r
775                                    nextPos=-1;\r
776                            }\r
777                            \r
778                    }\r
779                    else{\r
780                            error("Recombination file does not contain recombination rate information.\n");\r
781                    }\r
782                    \r
783                    recombVector = new float [pieces-1];\r
784                    \r
785                    for(int i=0;i<pieces-1;i++){\r
786                            if(i+1==nextPos){\r
787                                     currentRec=nextRec;\r
788                                     if(recombFile.peek()!=EOF){\r
789                                                 getline(recombFile,fileline);\r
790                                                 term = strtok ((char*)fileline.c_str()," \t");\r
791                                         nextRec=(float)(atof(term));\r
792                                         term = strtok (NULL, " \t");    \r
793                                         nextPos=atoi(term);\r
794                                         }\r
795                                         else{\r
796                                                 nextPos=-1;\r
797                                         }\r
798                                 }\r
799                                 \r
800                                 recombVector[i]=currentRec;\r
801                    }\r
802                    \r
803                    \r
804                    \r
805            }\r
806            else{\r
807                    error("The second column is not \"freq\" or \"pos\".\n");\r
808            }\r
809            \r
810            \r
811 \r
812            \r
813            cout<<"The recombination rates between fragments are:"<<endl<<"\t";\r
814            for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";\r
815            cout<<"("<<pieces<<")"<<endl;\r
816            cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;\r
817                 \r
818 }\r
819 \r
820 void drawtrees(){\r
821         \r
822         map<int,string > tree;\r
823         ostringstream  temp;\r
824         \r
825                            \r
826         for(int i=0;i<L;i++){\r
827                 tree.clear();\r
828                 \r
829                 for(int j=0;j<n;j++){\r
830                         if(tree[genepool[j*L+i]]!=""){\r
831                                 temp.str("");\r
832                                 temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
833                                 tree[genepool[j*L+i]]=  temp.str();\r
834                         }\r
835                         else{\r
836                                 temp.str("");\r
837                                 temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
838                                 tree[genepool[j*L+i]]=  temp.str();\r
839                         }\r
840                 }\r
841                 \r
842         \r
843                 for(int j=n*L;j<nextFree;j++){\r
844                         if(tree[j]!=""){\r
845                                 if(genepool[j]==0){\r
846                                         cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;\r
847                                 }\r
848                                 else{                                                   \r
849                                         if(tree[genepool[j]]!=""){\r
850                                                 temp.str("");\r
851                                                 temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
852                                                 tree[genepool[j]]=      temp.str();\r
853                                         }\r
854                                         else{\r
855                                                 temp.str("");\r
856                                                 temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
857                                                 tree[genepool[j]]=temp.str();\r
858                                         }\r
859                                 }\r
860                         }\r
861                 }\r
862         }\r
863         \r
864         tree.clear();\r
865 }\r
866 \r
867 \r
868 \r
869 \r
870 void genome(string popSize,                                             // effective population size of each population\r
871                   int nSubPOP,                                                  // number of populations\r
872                   vector<int> & nSubSample,                     // number of samples (haplotypes) draw from each populations\r
873                   int numPieces,                                                // number of fragments for each sample (chromosome)\r
874                   int pieceLen,                                                 // length in base pair of each fragment\r
875                   int numIndepRegion,                                   // number of independent regions (independent chromosome)\r
876                   int s,                                                                // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution \r
877                   string rec,                                                   // recombination rate between consecutive fragments per generation\r
878                   double mut,                                                   // mutation rate per generation per base pair\r
879                   double mig,                                                   // migration rate per generation\r
880                   vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
881                   long t,                                                               // random seed \r
882                   int drawtree,                                         // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees   \r
883                   bool printparameters                                  // =1 print out all input parameters, =0 do not print input parameters\r
884                   )\r
885 {   \r
886    if(atoi(popSize.c_str())>0){                 // if the population size is fixed during evoluation\r
887            N = atoi(popSize.c_str())*nSubPOP;\r
888            maxN = N;\r
889            \r
890            fixedPOP = true;\r
891            \r
892            popProfile.clear();\r
893            struct popProfilesStruct temp_popProfile;\r
894            popProfile.push_back(temp_popProfile);\r
895            \r
896            popProfile[0].generation = 0;\r
897            \r
898            for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));\r
899            for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));\r
900            \r
901            cout<<"*** POPULATION PROFILE ***"<<endl;\r
902            cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;\r
903            cout<<"Sizes of populations = ";\r
904            for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";\r
905            cout<<endl;\r
906 //         cout<<"POP Start index = ";\r
907 //         for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";\r
908 //         cout<<endl;\r
909            cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;\r
910                         \r
911    }\r
912    else{                                                                        // popSize stores the filename of the population profile during evoluation\r
913            fixedPOP = false;      \r
914         \r
915            currentProfile = 0;\r
916            \r
917            readPopProfile(popSize,nSubPOP);\r
918            \r
919            N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
920            \r
921            // print out the population profile information.\r
922            \r
923            cout<<"*** POPULATION PROFILE ***"<<endl;\r
924            cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;\r
925            cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;\r
926            \r
927            for(unsigned int i=0;i<popProfile.size();i++){\r
928                         cout<<"Generation = "<<popProfile[i].generation<<endl;\r
929                         cout<<"Sizes of populations= ";\r
930                         for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";\r
931                         cout<<endl;\r
932 //                      cout<<"POP Start index = ";\r
933 //                      for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";\r
934 //                      cout<<endl;\r
935                         if(popProfile[i].popChange.size()>0){\r
936                                 cout<<"Populations change backward in time:"<<endl;\r
937                                 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
938                                         cout<<" From "<<j+1<<" to ";\r
939                                         for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";\r
940                                         cout<<endl;\r
941                                 }\r
942                                 \r
943 //                              cout<<"Selection cumulative probability:"<<endl;   \r
944 //                              for(int j=0;j<popProfile[i].selectProb.size();j++){\r
945 //                                      cout<<" From "<<j+1<<" with prob = ";\r
946 //                                      for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";\r
947 //                                      cout<<endl;\r
948 //                              }\r
949                         }\r
950                         if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;\r
951                         cout<<endl;\r
952            }\r
953    }\r
954    \r
955    n=0;\r
956    nPOP=nSubPOP;\r
957    nSample.resize(nPOP);\r
958    for(int i=0;i<nPOP;i++){\r
959            if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");\r
960            n += nSubSample[i];\r
961            nSample[i]=nSubSample[i];\r
962    }\r
963    \r
964    L = numPieces;\r
965    length = pieceLen;\r
966    numChr = numIndepRegion;\r
967    SNP=s;\r
968    \r
969    if(atof(rec.c_str())>0){                             // if the recombination rate is fixed\r
970            recombination = atof(rec.c_str());\r
971    }\r
972    else{                                                // the distribution of recombination rate is described in the file (filename stored in rec)\r
973            recombination = -1;\r
974            readRecombination(rec,L);\r
975    }\r
976    \r
977    \r
978    \r
979    mutation = mut;\r
980    migration = mig;\r
981    \r
982    if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t\r
983    cout<<endl<<"random seed="<<t<<endl;\r
984    globalRandom.Reset(t);       \r
985    \r
986    if(printparameters){\r
987            printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);\r
988            cout<<"subSample = ";\r
989            for(int i=0;i<nPOP;i++){\r
990                    cout<<nSample[i]<<" ";\r
991            }\r
992            cout<<endl;\r
993    }\r
994    \r
995    if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){\r
996            cout<<"Input parameters have to be positive."<<endl;\r
997            printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);\r
998            exit(0);\r
999    }\r
1000 \r
1001    poolsize = 2 * n * L - L;\r
1002 \r
1003    AllocateMemory(return_chromosome);\r
1004 \r
1005 //    cout<<"memory OK!"<<endl<<endl;\r
1006    \r
1007         for(int chr=0;chr<numChr;chr++){\r
1008                 \r
1009            Initialize();\r
1010            \r
1011 //         cout<<"Initialization OK!"<<endl;\r
1012         \r
1013            bool done = false;\r
1014            int  generation = 0, units = n * L;\r
1015            \r
1016            while (!done)\r
1017               {\r
1018         //       printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%]   %c",\r
1019         //              generation++, units, nextFree * 100. / poolsize,\r
1020         //              !debug ? '\r' : '\n');\r
1021         \r
1022                   generation++;\r
1023                   \r
1024                   if(!fixedPOP && currentProfile<popProfile.size()-1)\r
1025                                 if(generation == popProfile[currentProfile+1].generation)noChange=false;\r
1026                   \r
1027               //if (debug)\r
1028               //   {\r
1029               //   printf("F  : ");\r
1030               //   for (int i = 0; i < L; i++)\r
1031               //      printf("%3d ", fragments[i]);\r
1032               //   printf("\n");\r
1033               //   }\r
1034         \r
1035               // Position of the first segment allocated to the current generation\r
1036               int generationStart = nextFree;\r
1037         \r
1038               // For each individual in the population\r
1039               for (int i = 0; i < N && !done; i++)\r
1040                  if (child_flags[i])\r
1041                     {\r
1042                             \r
1043                     /*if (debug) printf("%2d : ", i);*/\r
1044         \r
1045                     // The position we are currently tracking and its distance\r
1046                     // from the previous position\r
1047                     int distance = 0, parent = -1;\r
1048                     int startPiece = 0;\r
1049                     float recombProb;\r
1050            \r
1051                             for (int block = child_First[i]; block <= child_Last[i]; block++)\r
1052                 if (children[i][block] == NULL)\r
1053                    distance += BLOCKSIZE;\r
1054                 else\r
1055                    {\r
1056                    int pos = BLOCKSIZE * block;\r
1057                    int pos_in_block;\r
1058                                    int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );\r
1059 \r
1060                    while (pos <  pos_bound )\r
1061                        {\r
1062                                \r
1063                            pos_in_block = pos % BLOCKSIZE;\r
1064                        // Skip through uninformative positions along each chromosome\r
1065                        if (children[i][block][pos_in_block] == -1)\r
1066                           {\r
1067                           //if (debug) printf("  . \n");\r
1068                           distance++;\r
1069                           pos++;\r
1070                           continue;\r
1071                           }\r
1072                          \r
1073                        // Pick an ancestor at random for the current piece of chromosome\r
1074                        \r
1075                        if(recombination>0){                     // if the recombination rate is fixed over the genome\r
1076                                recombProb = (float)(pow(1.0 - recombination, distance));\r
1077                        }\r
1078                        else{                                            // if the recombination rate is specified by the profile\r
1079                                recombProb = 1;\r
1080                                for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);\r
1081                        }\r
1082                        \r
1083                     \r
1084                        if (parent == -1 ||\r
1085                            (distance > 0 && globalRandom.Next() > recombProb)){\r
1086                                     parent = SampleParent(i, parent);\r
1087                         }\r
1088                             \r
1089                        \r
1090                        TouchParent(parent, pos);\r
1091                        \r
1092                        \r
1093                        // Reset distance between segments\r
1094                        distance = 1;\r
1095                        startPiece=pos;\r
1096 \r
1097                        //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);\r
1098                        \r
1099                        // Check if we sampled a new ancestral segment ...\r
1100                        if (parents[parent][block][pos_in_block] == -1)\r
1101                           {\r
1102                           // We delay sampling a new gene from the pool until we know\r
1103                           // a coalescent event occured ...\r
1104                           parents[parent][block][pos_in_block] = children[i][block][pos_in_block];\r
1105                           \r
1106                           }\r
1107                        // Or if a coalescent event occured\r
1108                        else\r
1109                           {\r
1110                           // If we previously delayed sampling this parent\r
1111                           if (parents[parent][block][pos_in_block] < generationStart)\r
1112                             {\r
1113                                     genetimes[nextFree] = generation;\r
1114                                     genepool[parents[parent][block][pos_in_block]] = nextFree;\r
1115                                     parents[parent][block][pos_in_block] = nextFree++;\r
1116 \r
1117                                     if (nextFree == poolsize && units != 2)\r
1118                                        error("Genepool exhausted\n");\r
1119                             }\r
1120 \r
1121                           genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];\r
1122                           \r
1123                           fragments[pos]--;\r
1124                           units--;\r
1125 \r
1126                          if (fragments[pos] == 1)\r
1127                             {\r
1128                             // Reached the MRCA for this fragment\r
1129                             MRCAtimes[pos] = generation;\r
1130                             MRCA[pos] = parents[parent][block][pos_in_block];\r
1131                             \r
1132                             genepool[parents[parent][block][pos_in_block]]=0;\r
1133                             \r
1134                             parents[parent][block][pos_in_block] = -1;\r
1135                             units--;\r
1136          \r
1137                             // One less segment to track\r
1138                             if (--activeSegments == 0)\r
1139                                {\r
1140                                done = true;\r
1141                                break;\r
1142                                }\r
1143                             }\r
1144                          }\r
1145                       pos++;\r
1146                       }\r
1147                    }\r
1148                     }\r
1149                     \r
1150               NewGeneration();\r
1151             \r
1152               }\r
1153         \r
1154            if(drawtree){\r
1155                    cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;\r
1156                    drawtrees();\r
1157            }  \r
1158               \r
1159            // assign mutations\r
1160 \r
1161            cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;\r
1162            \r
1163            FreeCoalescentMemory();\r
1164            \r
1165            cout<<"Coalescent Memory free"<<endl;\r
1166            \r
1167            InitializeMutation(return_chromosome);\r
1168 \r
1169            cout<<"Mutation process initialized"<<endl;\r
1170                 \r
1171 //          cout<<"genepool:\t";\r
1172 //              for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
1173 //              cout<<endl;\r
1174 //              \r
1175 //              cout<<"genetimes:\t";\r
1176 //              for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
1177 //              cout<<endl;\r
1178 //         \r
1179 //              cout<<"nextFree="<<nextFree<<endl;\r
1180                 \r
1181 //              cout<<"mutation:\t";\r
1182 //              for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
1183 //              cout<<endl;\r
1184                 \r
1185            if(SNP<=0){\r
1186                    for(int i=0;i<nextFree;i++){\r
1187                                 if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation);   \r
1188                    }\r
1189            }\r
1190            else{\r
1191                    unsigned long * cumulativeCount = new unsigned long [nextFree];\r
1192                    \r
1193                    if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];\r
1194                    else cumulativeCount[0] = 0;\r
1195                    \r
1196                    for(int i=1;i<nextFree;i++){\r
1197                                 if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i]; \r
1198                                 else cumulativeCount[i] = cumulativeCount[i-1];  \r
1199                    }\r
1200                    \r
1201                    unsigned long totalGeneration = cumulativeCount[nextFree - 1];\r
1202                    \r
1203 //                 cout<<"Total Generation = "<<totalGeneration<<endl;\r
1204 //                 \r
1205 //                 if((unsigned long)SNP>totalGeneration){\r
1206 //                              cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl;   \r
1207 //                 }\r
1208                    \r
1209                    for(int i=0;i<SNP;i++){\r
1210                            \r
1211                            unsigned long randomLong = globalRandom.NextInt()%totalGeneration;\r
1212                            \r
1213                            int begin = 0;\r
1214                            int end = nextFree - 1;\r
1215                            int middle = (begin+end)/2;\r
1216                            \r
1217                            while(1){\r
1218                                    if(cumulativeCount[middle]<randomLong){\r
1219                                                 begin = middle;\r
1220                                    }\r
1221                                    else{\r
1222                                                 end = middle;\r
1223                                    }\r
1224                                    \r
1225                                    if(end == begin+1)break;\r
1226                                    \r
1227                                    middle = (begin+end)/2;\r
1228                            }\r
1229                            \r
1230                            if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){\r
1231                                         cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
1232                                         error("see above message!\n");\r
1233                            }\r
1234                            \r
1235                            int sampleIndex;\r
1236                            \r
1237                            if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;\r
1238                            else sampleIndex = end;\r
1239                            \r
1240                            if(genepool[sampleIndex]==0){\r
1241                                    cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
1242                                    error("see above message!\n");\r
1243                            }\r
1244                            \r
1245                            branchMutation[sampleIndex]++;\r
1246                    }\r
1247                    \r
1248                    delete [] cumulativeCount;\r
1249                    \r
1250            }\r
1251    \r
1252            delete [] genetimes;\r
1253            \r
1254            cout<<"Mutation assigned"<<endl;\r
1255             \r
1256 //      cout<<"mutation:\t";\r
1257 //              for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
1258 //              cout<<endl;\r
1259 \r
1260            AllocateMutationMemory();\r
1261            \r
1262            cout<<"Mutation Memory allocated"<<endl;\r
1263                 \r
1264            PrepareMutation();\r
1265         \r
1266            for(int i=0;i<n;i++){\r
1267                         for(int j=0;j<L;j++){\r
1268                                 if(branchMutation[i*L+j]>0){\r
1269                                         geneIndex[i*L+j] = fragmentLength[j];\r
1270                                         fragmentLength[j] += branchMutation[i*L+j];\r
1271                                 }\r
1272                                 branchFragment[i*L+j] = j;\r
1273                         }\r
1274            }   \r
1275               \r
1276            \r
1277            for(int i=0;i<nextFree;i++){\r
1278                         if(genepool[genepool[i]]!=0 && genepool[i]!=0){\r
1279                            if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){\r
1280                                    geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];\r
1281                                    fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];\r
1282                            }\r
1283                            branchFragment[genepool[i]]=branchFragment[i];\r
1284                         }\r
1285            }\r
1286            \r
1287            //cout<<"branchfragment done"<<endl;\r
1288            \r
1289 //      cout<<"branchFragment:\t";\r
1290 //              for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";\r
1291 //              cout<<endl;\r
1292 \r
1293 //      cout<<"geneIndex:\t";\r
1294 //              for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";\r
1295 //              cout<<endl;\r
1296 //      \r
1297 //              cout<<"fragmentLength:\t";\r
1298 //              for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";\r
1299 //              cout<<endl;\r
1300                 \r
1301 \r
1302 //          cout<<"genepool genetime mutation frag index\n";\r
1303 //              for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;\r
1304 //              cout<<endl;\r
1305            \r
1306 \r
1307    \r
1308        delete [] branchFragment;\r
1309        \r
1310            cout<<"Internal memory free"<<endl;\r
1311            \r
1312            // print the fragment index for each SNP\r
1313            cout<<"SNP genetic position scaled to [0,1]:"<<endl;\r
1314            if(L>1){\r
1315                    for(int i=0;i<L;i++){\r
1316                                 for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" ";   \r
1317                    }\r
1318            }\r
1319            else{\r
1320                    cout<<"Only one fragment to be simulated. No recombination between SNPs.";\r
1321            }\r
1322            cout<<endl;\r
1323                    \r
1324            int totalLength=0;\r
1325            if(SNP <=0){ \r
1326                    for(int i=0;i<L;i++)totalLength += fragmentLength[i];\r
1327            }\r
1328            \r
1329            \r
1330            for(int i=0;i<n;i++){\r
1331                         for(int j=0;j<L;j++){\r
1332                                 chromosome[i][j].resize(fragmentLength[j],0);\r
1333 \r
1334                                 for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;\r
1335                                 \r
1336                                 int current=i*L+j;\r
1337                 \r
1338                                 while(genepool[genepool[current]]!=0){\r
1339                                         for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;\r
1340                                         current=genepool[current];\r
1341                                 }\r
1342                         }\r
1343            }\r
1344            \r
1345            cout<<"Chromosome done"<<endl;\r
1346            \r
1347            FreeMutationMemory();\r
1348            \r
1349            if(SNP <=0){ \r
1350                    for(int i=0;i<n;i++){\r
1351                                 return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);\r
1352                    }\r
1353            }\r
1354            \r
1355            for(int i=0;i<n;i++){\r
1356                         for(int j=0;j<L;j++){\r
1357                                 return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());\r
1358                         }\r
1359            }     \r
1360            \r
1361            cout<<"Return chromosome done"<<endl;\r
1362            \r
1363            // print the chromosomes\r
1364 //         for(int i=0;i<n;i++){\r
1365 //                      printf("Chr %d: ",i);\r
1366 //                      for(int j=0;j<L;j++){\r
1367 //                              for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];\r
1368 //                              cout<<" ";      \r
1369 //                      }\r
1370 //                      cout<<endl;\r
1371 //         }       \r
1372 //         \r
1373 //         \r
1374 //        for(int i=0;i<n;i++){\r
1375 //                      printf("return Chr %d: ",i);\r
1376 //                      for(int j=0;j<return_chromosome[i].size();j++){\r
1377 //                              cout<<return_chromosome[i][j];\r
1378 //                      }\r
1379 //                      cout<<endl;\r
1380 //         }\r
1381 \r
1382            \r
1383    \r
1384    }\r
1385    \r
1386    FreeMemory();\r
1387    \r
1388    printf("\nDone simulating ARG ...\n");\r
1389    \r
1390    \r
1391    \r
1392 }\r
1393 \r
1394 \r