]> git.donarmstrong.com Git - genome.git/blob - genome.cpp
import initial version of genome
[genome.git] / genome.cpp
1 ////////////////////////////////////////////////////////////////////// \r
2 // genome.cpp\r
3 //////////////////////////////////////////////////////////////////////////////\r
4 //              COPYRIGHT NOTICE FOR GENOME CODE\r
5 //\r
6 // Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
7 // Version 0.2\r
8 // All rights reserved.\r
9 //\r
10 //   Redistribution and use in source and binary forms, with or without\r
11 //   modification, are permitted provided that the following conditions\r
12 //   are met:\r
13 //\r
14 //     1. Redistributions of source code must retain the above copyright\r
15 //        notice, this list of conditions and the following disclaimer.\r
16 //\r
17 //     2. Redistributions in binary form must reproduce the above copyright\r
18 //        notice, this list of conditions and the following disclaimer in the\r
19 //        documentation and/or other materials provided with the distribution.\r
20 //\r
21 //     3. The names of its contributors may not be used to endorse or promote\r
22 //        products derived from this software without specific prior written\r
23 //        permission.\r
24 //\r
25 //   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
26 //   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
27 //   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
28 //   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
29 //   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
30 //   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
31 //   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
32 //   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
33 //   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
34 //   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
35 //   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
36 //\r
37 ///////////////////////////////////////////////////////////////////////////////\r
38 \r
39 \r
40 #define _CRT_SECURE_NO_DEPRECATE \r
41 \r
42 #include "Random.h"\r
43 #include "Error.h"\r
44 #include "stochastic.h" \r
45 #include <iostream>\r
46 #include <fstream>\r
47 #include <math.h>\r
48 #include <vector> \r
49 #include <time.h>\r
50 #include <algorithm>\r
51 #include <map>\r
52 #include <sstream>\r
53 #include <stdio.h>\r
54 \r
55 using namespace std;\r
56 \r
57 //static bool debug = false;\r
58 static bool fixedPOP = true;\r
59 static bool noChange = true;\r
60 \r
61 \r
62 static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment\r
63 static int maxN;\r
64 static unsigned int currentProfile;\r
65 static int nPOP=1;\r
66 static vector<int> nSample(nPOP,3);\r
67 static double recombination = 1e-8, migration = 0.0, mutation=1e-8;\r
68 \r
69 static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;\r
70 static int *** children, *** parents, * cblocks, * pblocks;\r
71 static int * parent_First, * parent_Last, * child_First, * child_Last;\r
72 static bool * child_flags, * parent_flags;\r
73 static int nextFree = 0, activeSegments = 0, pblockcounter = 0;\r
74 static int numBLOCKs;\r
75 static int BUFFER=1500;\r
76 static int INCREMENT=1000;\r
77 \r
78 #define BLOCKSIZE   128\r
79 #define SWAP(tmp,a,b)  tmp=a; a=b; b=tmp;\r
80 \r
81 static vector< vector< vector<bool> > > chromosome;\r
82 static int * fragmentLength;\r
83 static int * branchFragment;\r
84 static int * geneIndex;\r
85 static int * branchMutation;\r
86 \r
87 static float * recombVector=NULL;\r
88 \r
89 struct popProfilesStruct{\r
90         int generation;\r
91         vector<int> popsize;\r
92         vector<int> popStart;\r
93         vector< vector<float> > selectProb;\r
94         vector< vector<int> > popChange;\r
95 };\r
96 \r
97 static vector<struct popProfilesStruct> popProfile;\r
98 \r
99 void print(){\r
100         cout<<"\n*****************\nParents:\t";\r
101         for(int i=0;i<N;i++){\r
102                 if(parent_flags[i]){\r
103                         for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
104                         cout<<"\t";     \r
105                 }\r
106         }       \r
107         cout<<endl;\r
108         \r
109         cout<<"Children:\t";\r
110         for(int i=0;i<N;i++){\r
111                 if(child_flags[i]){\r
112                         for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
113                         cout<<"\t";     \r
114                 }\r
115         }       \r
116         cout<<endl;\r
117         \r
118         \r
119         \r
120 //      cout<<"parent_flag:\t";\r
121 //      for(int i=0;i<N;i++){\r
122 //              cout<<parent_flags[i]<<"\t";\r
123 //      }       \r
124 //      cout<<endl;\r
125 //      \r
126 //      cout<<"children_flag:\t";\r
127 //      for(int i=0;i<N;i++){\r
128 //              cout<<child_flags[i]<<"\t";\r
129 //      }       \r
130 //      cout<<endl;\r
131         \r
132         cout<<"genepool:\t";\r
133         for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
134         cout<<endl;\r
135         \r
136         cout<<"genetimes:\t";\r
137         for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
138         cout<<endl;\r
139         \r
140         cout<<"MCRA:\t";\r
141         for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";\r
142         cout<<endl;\r
143         \r
144         cout<<"MCRAtime:\t";\r
145         for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";\r
146         cout<<endl;\r
147         \r
148         cout<<"fragments:\t";\r
149         for(int i=0;i<L;i++)cout<<fragments[i]<<" ";\r
150         cout<<endl;\r
151         \r
152         cout<<"NextFree="<<nextFree<<endl;\r
153 }\r
154 \r
155 int *** AllocateIntPtrMatrix(int rows, int columns){\r
156    int *** result = new int ** [rows];\r
157 \r
158    for (int i = 0; i < rows; i++)\r
159       result[i] = new int * [columns];\r
160   \r
161    return result;\r
162 }\r
163    \r
164 void FreeIntPtrMatrix(int *** & matrix, int rows){\r
165         \r
166    for (int i = 0; i < rows; i++)\r
167       delete [] matrix[i];\r
168 \r
169    delete [] matrix;\r
170 \r
171    matrix = NULL;\r
172 }   \r
173    \r
174 void NewGeneration(){\r
175         \r
176    int *** ptrmatrix, * vector;\r
177    bool * boolvector;\r
178    \r
179 //    for(int i=0;i<maxN;i++){\r
180 //      if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]);   \r
181 //    }\r
182    \r
183    SWAP(ptrmatrix, children, parents);\r
184    SWAP(vector, child_First, parent_First);\r
185    SWAP(vector, child_Last, parent_Last);\r
186    SWAP(boolvector, child_flags, parent_flags);\r
187    SWAP(vector, cblocks, pblocks);\r
188    \r
189    if(!noChange){\r
190            currentProfile++;\r
191            nPOP = (int)(popProfile[currentProfile].popsize.size());\r
192            N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();\r
193            noChange = true;\r
194    }\r
195 \r
196    for (int i = 0; i < maxN; i++)\r
197       parent_flags[i] = 0;\r
198       \r
199 //    for (int i = 0; i < maxN; i++)\r
200 //        for (int j = 0; j < numBLOCKs; j++)\r
201 //            parents[i][j] = NULL;\r
202       \r
203    pblockcounter = 0;\r
204 \r
205 }\r
206 \r
207    \r
208 void AllocateMemory(vector< vector<bool> > &return_chromosome){\r
209    \r
210    \r
211 //    if(SNP<=0){\r
212 //              cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;\r
213 //    }\r
214 //    else{\r
215 //              cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;\r
216 //    }\r
217 //    cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;\r
218    \r
219  \r
220    return_chromosome.clear();\r
221    return_chromosome.resize(n);\r
222 //    for(int i=0;i<n;i++){\r
223 //         if(SNP<=0){\r
224 //                      //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));\r
225 //         }\r
226 //         else{\r
227 //                      //return_chromosome[i].reserve(SNP*numChr);\r
228 //         }\r
229 //      }\r
230    \r
231    chromosome.resize(n);\r
232    for(int i=0;i<n;i++){\r
233                 chromosome[i].resize(L);\r
234 //              for(int j=0;j<L;j++){\r
235 //                 if(SNP<=0){\r
236 //                         //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));\r
237 //                 }\r
238 //                 else{\r
239 //                         //chromosome[i][j].reserve(SNP/L+1);\r
240 //                 }\r
241 //              }\r
242    }\r
243   \r
244    genepool = new int [poolsize];\r
245 \r
246    numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);\r
247  \r
248    pblockcounter = 0;\r
249 \r
250 }\r
251 \r
252 \r
253 void AllocateMutationMemory(){\r
254         \r
255         fragmentLength = new int [L];\r
256         \r
257         branchFragment = new int [poolsize];\r
258         \r
259         geneIndex = new int [poolsize];   \r
260            \r
261 }\r
262    \r
263    \r
264    \r
265 void FreeMemory(){\r
266         \r
267    delete [] genepool;\r
268    \r
269    if(recombVector!=NULL)delete [] recombVector;\r
270    \r
271 }\r
272    \r
273   \r
274 void FreeMutationMemory(){\r
275         \r
276    delete [] fragmentLength;\r
277    delete [] geneIndex;\r
278    delete [] branchMutation;\r
279    \r
280         \r
281 }\r
282 \r
283 void FreeCoalescentMemory(){\r
284         \r
285    delete [] MRCA;\r
286    delete [] MRCAtimes;\r
287 \r
288    delete [] fragments;\r
289 \r
290    FreeIntPtrMatrix(children,maxN);\r
291    \r
292    FreeIntPtrMatrix(parents,maxN);\r
293 \r
294    delete [] child_flags;\r
295    delete [] parent_flags;\r
296 \r
297    delete [] cblocks;\r
298    delete [] pblocks;\r
299    \r
300    delete [] parent_First;\r
301    delete [] parent_Last;\r
302 \r
303    delete [] child_First;\r
304    delete [] child_Last;\r
305         \r
306 }\r
307 \r
308 void reallocBlock(){\r
309         \r
310         pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
311         cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
312         BUFFER += INCREMENT;    \r
313         \r
314 }\r
315 \r
316 void setnull(int parent, int position){\r
317         \r
318   if (parent_flags[parent] == 0){\r
319                 parent_First[parent]=position / BLOCKSIZE;\r
320                 parent_Last[parent]=position / BLOCKSIZE;       \r
321                 parents[parent][position / BLOCKSIZE] = NULL;  \r
322   }\r
323   else if(parent_First[parent]>position / BLOCKSIZE){\r
324           for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){\r
325                   parents[parent][i] = NULL;\r
326           }\r
327           parent_First[parent]=position / BLOCKSIZE;\r
328   }\r
329   else if(parent_Last[parent]<position / BLOCKSIZE){\r
330           for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){\r
331                   parents[parent][i] = NULL;\r
332           }\r
333           parent_Last[parent]=position / BLOCKSIZE;\r
334   }\r
335   \r
336 }\r
337 \r
338 \r
339 \r
340 void TouchParent(int parent, int position){\r
341 \r
342   \r
343   setnull(parent,position);\r
344                 \r
345   if (parents[parent][position / BLOCKSIZE] != NULL)return;\r
346  \r
347 \r
348   if (pblockcounter >= n*numBLOCKs+BUFFER){\r
349             \r
350             int * pblocks_old=pblocks;\r
351             int * cblocks_old=cblocks;\r
352             \r
353             reallocBlock();\r
354             \r
355             for(int i=0;i<N;i++)\r
356               if(parent_flags[i])\r
357                 for(int j=parent_First[i];j<=parent_Last[i];j++)\r
358                         if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;\r
359             \r
360             for(int i=0;i<N;i++)\r
361               if(child_flags[i])\r
362                 for(int j=child_First[i];j<=child_Last[i];j++)\r
363                         if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;\r
364                 \r
365                 printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);\r
366                 \r
367                 if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");\r
368   }\r
369 \r
370   parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;\r
371   \r
372   for (int i = 0; i < BLOCKSIZE; i++)\r
373       parents[parent] [position / BLOCKSIZE] [i] = -1;\r
374       \r
375   parent_flags[parent] = 1;\r
376   \r
377 }\r
378 \r
379 void Initialize(){\r
380         \r
381    genetimes = new int [poolsize];\r
382            \r
383    MRCA = new int [L];\r
384    MRCAtimes = new int [L];\r
385 \r
386    fragments = new int [L];\r
387 \r
388    children = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
389    parents = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
390    \r
391    parent_First = new int [maxN];\r
392    parent_Last = new int [maxN];\r
393 \r
394    child_First = new int [maxN];\r
395    child_Last = new int [maxN];\r
396 \r
397    cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));\r
398    pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE))); \r
399   \r
400    child_flags = new bool [maxN];\r
401    parent_flags = new bool [maxN];\r
402    \r
403    \r
404    noChange = true;\r
405         \r
406    nPOP = (int)(popProfile[0].popsize.size());\r
407    \r
408    N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
409    \r
410    currentProfile = 0;\r
411         \r
412    nextFree = 0;\r
413    \r
414    for (int i = 0; i < N; i++)\r
415       child_flags[i] = 0;\r
416      \r
417    int count=0;\r
418    for(int k=0;k<nPOP;k++){    \r
419            for (int i = 0; i < nSample[k]; i++)\r
420               {\r
421                   child_First[popProfile[0].popStart[k]+i]=0;\r
422                   child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;\r
423 \r
424               child_flags[popProfile[0].popStart[k]+i] = 1;\r
425 \r
426               for (int j = 0; j < numBLOCKs; j++)\r
427                         children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);\r
428                 \r
429               for (int j = 0; j < L; j++)\r
430                  {\r
431                  genepool[nextFree] = 1;\r
432                  genetimes[nextFree] = 0;\r
433                  children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;\r
434                  }\r
435                  \r
436                  count++;\r
437               }\r
438    }\r
439 \r
440    for (int i = 0; i < L; i++)\r
441       fragments[i] = n;\r
442 \r
443    for (int i = 0; i < N; i++)\r
444       parent_flags[i] = 0;\r
445 \r
446    activeSegments = L;\r
447    \r
448    //for (int i = 0; i < N; i++)\r
449    //  for (int j = 0; j < numBLOCKs; j++)\r
450    //     parents[i][j] = NULL;\r
451       \r
452    pblockcounter = 0;\r
453  \r
454 }\r
455 \r
456 void InitializeMutation(vector< vector<bool> > &return_chromosome){\r
457         \r
458 //         return_chromosome.clear();\r
459 //    return_chromosome.resize(n);\r
460 //    for(int i=0;i<n;i++){\r
461 //         return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));\r
462 //      }\r
463 //    \r
464 //    chromosome.resize(n);\r
465 //    for(int i=0;i<n;i++){\r
466 //              chromosome[i].resize(L);\r
467 //              for(int j=0;j<L;j++){\r
468 //                 chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));\r
469 //              }\r
470 //    }\r
471         \r
472 //    cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
473    \r
474    for(int i=0;i<n;i++){\r
475            for(int j=0;j<L;j++){\r
476                    chromosome[i][j].clear();\r
477            }\r
478    }\r
479    \r
480 //    cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
481    \r
482    branchMutation = new int [nextFree];\r
483    for(int i=0;i<nextFree;i++)branchMutation[i]=0;\r
484 \r
485 }\r
486 \r
487 void PrepareMutation(){\r
488         \r
489    for(int i=0;i<L;i++)fragmentLength[i]=0;\r
490    \r
491    for(int i=0;i<nextFree;i++){\r
492            geneIndex[i] = -1; \r
493            branchFragment[i] = -1;\r
494    }    \r
495         \r
496 }\r
497    \r
498 int SampleParent(int individual, int previous){\r
499         \r
500         if(fixedPOP){      \r
501            int population = individual/popProfile[0].popsize[0]; \r
502            \r
503            if (previous == -1 && nPOP>1){\r
504               if (globalRandom.Next() < migration){\r
505                       int random = globalRandom.NextInt()%(nPOP-1); \r
506                   if(population<=random)population=random+1;\r
507                   else population=random;\r
508               }\r
509            }\r
510               \r
511            return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];\r
512    }\r
513    else{\r
514            \r
515            if(noChange){\r
516                    int population=0; \r
517            \r
518                    for(int i=0;i<nPOP;i++){\r
519                            if(individual>=popProfile[currentProfile].popStart[i])population=i;\r
520                            else break;\r
521                    }\r
522                    \r
523 //                 cout<<"No change: ind="<<individual<<" pop="<<population<<endl;\r
524                    \r
525                    if (previous == -1 && nPOP>1){\r
526                       if (globalRandom.Next() < migration){\r
527                               int random = globalRandom.NextInt()%(nPOP-1); \r
528                           if(population<=random)population=random+1;\r
529                           else population=random;\r
530                       }\r
531                    }\r
532                    \r
533 //                 cout<<"new pop="<<population<<endl;\r
534                       \r
535                    return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];\r
536            }\r
537            else{\r
538                    int population=0; \r
539            \r
540                    for(int i=0;i<nPOP;i++){\r
541                            if(individual>=popProfile[currentProfile].popStart[i])population=i; \r
542                            else break;\r
543                    }\r
544 //                 cout<<"Change: ind="<<individual<<" pop="<<population<<endl;\r
545                    \r
546                    if (previous == -1 && nPOP>1){\r
547                       if (globalRandom.Next() < migration){\r
548                               int random = globalRandom.NextInt()%(nPOP-1); \r
549                           if(population<=random)population=random+1;\r
550                           else population=random;\r
551                       }\r
552                    }\r
553                    \r
554 //                 cout<<"new pop="<<population<<endl;\r
555                    \r
556                    if(popProfile[currentProfile].popChange[population].size()==1){\r
557 //                         cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;\r
558                            \r
559                                 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]]) \r
560                                                 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]];      \r
561                    }\r
562                    else{\r
563                                 double selectRandom = globalRandom.Next();\r
564                                 for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){\r
565                                         if(selectRandom < popProfile[currentProfile].selectProb[population][i]){\r
566                                                 \r
567 //                                              cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;\r
568                                                 \r
569                                                 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]]) \r
570                                                 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];\r
571                                         }       \r
572                                 }\r
573                                 cout<<"Random number generator error!"<<endl;\r
574                                 exit(1);\r
575                    }\r
576                    \r
577                    \r
578            }\r
579            \r
580    }\r
581 }\r
582 \r
583 void readPopProfile(string filename, unsigned int numPopulation){\r
584            \r
585            int popProfile_index=0;\r
586            \r
587            popProfile.clear();\r
588            struct popProfilesStruct temp_popProfile;\r
589            popProfile.push_back(temp_popProfile);       \r
590                                    \r
591            ifstream popFile;\r
592            string fileline;\r
593            char *term;\r
594            char *term1;\r
595            \r
596            popFile.open(filename.c_str());\r
597            if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());\r
598            \r
599            getline(popFile,fileline);                                   // the first line is the population sizes at generation 0\r
600            \r
601            term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
602            popProfile[popProfile_index].generation=atoi(term);\r
603            \r
604            if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");\r
605            \r
606            term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
607            while(term!=NULL){\r
608                            popProfile[popProfile_index].popsize.push_back(atoi(term));\r
609                            term = strtok (NULL, " \t");\r
610            }\r
611 \r
612            popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
613            for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
614                    popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
615            }\r
616                            \r
617            maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
618            \r
619            if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());\r
620            \r
621            // read in the population correspondence to the next population profile\r
622            \r
623                    \r
624            while(popFile.peek()!=EOF){\r
625                    getline(popFile,fileline);                                           // the even number lines are the correspondence of populations of different generation\r
626                    popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());\r
627                    \r
628                    term = strtok ((char*)fileline.c_str(),"- \t");  // the FROM population\r
629                    while(term!=NULL){\r
630                            \r
631                            term1 = strtok (NULL, "- \t");                               // the TO population\r
632 \r
633                            popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);\r
634                            \r
635                            term = strtok (NULL, "- \t");\r
636                    }\r
637                    \r
638                    popProfile_index++;\r
639                    popProfile.push_back(temp_popProfile);\r
640                    \r
641                    if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");\r
642                    \r
643                    // read the next population profile\r
644                    \r
645                    getline(popFile,fileline);                                   // the population sizes\r
646    \r
647                    term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
648                    popProfile[popProfile_index].generation=atoi(term);\r
649                    \r
650                    term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
651                    while(term!=NULL){\r
652                                    popProfile[popProfile_index].popsize.push_back(atoi(term));\r
653                                    term = strtok (NULL, " \t");\r
654                    }\r
655                    \r
656                    popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
657                    for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
658                            popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
659                    }\r
660                    \r
661                    if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())\r
662                                         maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
663            }    \r
664            \r
665            for(unsigned int i=0;i<popProfile.size();i++){\r
666                    popProfile[i].selectProb.resize(popProfile[i].popChange.size());\r
667                    for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
668                            float sum=0.0;\r
669                            for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];\r
670                            \r
671                            popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);\r
672                            for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)\r
673                                         popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);\r
674                    }\r
675            }\r
676            // check for consistence\r
677            \r
678            int min, max;\r
679            \r
680            for(unsigned int i=0;i<popProfile.size()-1;i++){\r
681                         min=99999;max=-1;\r
682                         for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
683                                 if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1);     \r
684                                 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){\r
685                                         if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];\r
686                                         if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];\r
687                                 }\r
688                         }   \r
689                         if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))\r
690                                 error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());\r
691            }\r
692         \r
693 }\r
694 \r
695 \r
696 void readRecombination(string filename, int pieces){\r
697 \r
698            ifstream recombFile;\r
699            string fileline;\r
700            char *term;\r
701                 \r
702            vector<float> recDistribution;\r
703            vector<float> recRate;\r
704            \r
705            float sum=0.0;\r
706            float random;\r
707            \r
708            recombFile.open(filename.c_str());\r
709            if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());\r
710            \r
711            getline(recombFile,fileline);                // the first line is head of distribution table\r
712            term = strtok ((char*)fileline.c_str()," \t");\r
713            term = strtok (NULL, " \t"); \r
714            \r
715            if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){\r
716                    while(recombFile.peek()!=EOF){\r
717                            getline(recombFile,fileline);\r
718                            \r
719                            term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate\r
720                            recRate.push_back((float)(atof(term)));\r
721                            \r
722                            if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));\r
723                            \r
724                            term = strtok (NULL, " \t");                                   // the second number is the frequency of recombination rate\r
725                            recDistribution.push_back((float)(atof(term)));\r
726                            \r
727                            if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));\r
728                            \r
729                            sum += (float)(atof(term));\r
730                    }\r
731                    \r
732                    if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());\r
733                    \r
734                    // normalize the frequency \r
735                    for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;\r
736                    \r
737                    // compute the cumulative distribution\r
738                    for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];\r
739                    \r
740                    recombVector = new float [pieces-1];\r
741                    \r
742                    for(int i=0;i<pieces-1;i++){\r
743                            random = (float)(globalRandom.Next());\r
744                            for(unsigned int j=0;j<recDistribution.size();j++){\r
745                                         if(random < recDistribution[j]){\r
746                                                 recombVector[i]=recRate[j];\r
747                                                 break;   \r
748                                         }\r
749                            }\r
750                    }\r
751                    \r
752                         //check the input\r
753                         cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;\r
754                         cout<<"Recombination_Rate\tCumulative_frequency"<<endl;\r
755                         for(unsigned int i=0;i<recRate.size();i++){\r
756                                 cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl;  \r
757                         }               \r
758                            \r
759            }\r
760            else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){\r
761                    float currentRec=0.0,nextRec=0.0;\r
762                    int nextPos=-1;\r
763                    \r
764                    if(recombFile.peek()!=EOF){\r
765                            getline(recombFile,fileline);\r
766                            term = strtok ((char*)fileline.c_str()," \t");\r
767                            currentRec=(float)(atof(term));\r
768                            \r
769                            if(recombFile.peek()!=EOF){\r
770                                    getline(recombFile,fileline);\r
771                                    term = strtok ((char*)fileline.c_str()," \t");\r
772                                nextRec=(float)(atof(term));\r
773                                term = strtok (NULL, " \t");     \r
774                                nextPos=atoi(term);\r
775                            }\r
776                            else{\r
777                                    nextPos=-1;\r
778                            }\r
779                            \r
780                    }\r
781                    else{\r
782                            error("Recombination file does not contain recombination rate information.\n");\r
783                    }\r
784                    \r
785                    recombVector = new float [pieces-1];\r
786                    \r
787                    for(int i=0;i<pieces-1;i++){\r
788                            if(i+1==nextPos){\r
789                                     currentRec=nextRec;\r
790                                     if(recombFile.peek()!=EOF){\r
791                                                 getline(recombFile,fileline);\r
792                                                 term = strtok ((char*)fileline.c_str()," \t");\r
793                                         nextRec=(float)(atof(term));\r
794                                         term = strtok (NULL, " \t");    \r
795                                         nextPos=atoi(term);\r
796                                         }\r
797                                         else{\r
798                                                 nextPos=-1;\r
799                                         }\r
800                                 }\r
801                                 \r
802                                 recombVector[i]=currentRec;\r
803                    }\r
804                    \r
805                    \r
806                    \r
807            }\r
808            else{\r
809                    error("The second column is not \"freq\" or \"pos\".\n");\r
810            }\r
811            \r
812            \r
813 \r
814            \r
815            cout<<"The recombination rates between fragments are:"<<endl<<"\t";\r
816            for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";\r
817            cout<<"("<<pieces<<")"<<endl;\r
818            cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;\r
819                 \r
820 }\r
821 \r
822 void drawtrees(){\r
823         \r
824         map<int,string > tree;\r
825         ostringstream  temp;\r
826         \r
827                            \r
828         for(int i=0;i<L;i++){\r
829                 tree.clear();\r
830                 \r
831                 for(int j=0;j<n;j++){\r
832                         if(tree[genepool[j*L+i]]!=""){\r
833                                 temp.str("");\r
834                                 temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
835                                 tree[genepool[j*L+i]]=  temp.str();\r
836                         }\r
837                         else{\r
838                                 temp.str("");\r
839                                 temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
840                                 tree[genepool[j*L+i]]=  temp.str();\r
841                         }\r
842                 }\r
843                 \r
844         \r
845                 for(int j=n*L;j<nextFree;j++){\r
846                         if(tree[j]!=""){\r
847                                 if(genepool[j]==0){\r
848                                         cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;\r
849                                 }\r
850                                 else{                                                   \r
851                                         if(tree[genepool[j]]!=""){\r
852                                                 temp.str("");\r
853                                                 temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
854                                                 tree[genepool[j]]=      temp.str();\r
855                                         }\r
856                                         else{\r
857                                                 temp.str("");\r
858                                                 temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
859                                                 tree[genepool[j]]=temp.str();\r
860                                         }\r
861                                 }\r
862                         }\r
863                 }\r
864         }\r
865         \r
866         tree.clear();\r
867 }\r
868 \r
869 \r
870 \r
871 \r
872 void genome(string popSize,                                             // effective population size of each population\r
873                   int nSubPOP,                                                  // number of populations\r
874                   vector<int> & nSubSample,                     // number of samples (haplotypes) draw from each populations\r
875                   int numPieces,                                                // number of fragments for each sample (chromosome)\r
876                   int pieceLen,                                                 // length in base pair of each fragment\r
877                   int numIndepRegion,                                   // number of independent regions (independent chromosome)\r
878                   int s,                                                                // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution \r
879                   string rec,                                                   // recombination rate between consecutive fragments per generation\r
880                   double mut,                                                   // mutation rate per generation per base pair\r
881                   double mig,                                                   // migration rate per generation\r
882                   vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
883                   long t,                                                               // random seed \r
884                   int drawtree,                                         // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees   \r
885                   bool printparameters                                  // =1 print out all input parameters, =0 do not print input parameters\r
886                   )\r
887 {   \r
888    if(atoi(popSize.c_str())>0){                 // if the population size is fixed during evoluation\r
889            N = atoi(popSize.c_str())*nSubPOP;\r
890            maxN = N;\r
891            \r
892            fixedPOP = true;\r
893            \r
894            popProfile.clear();\r
895            struct popProfilesStruct temp_popProfile;\r
896            popProfile.push_back(temp_popProfile);\r
897            \r
898            popProfile[0].generation = 0;\r
899            \r
900            for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));\r
901            for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));\r
902            \r
903            cout<<"*** POPULATION PROFILE ***"<<endl;\r
904            cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;\r
905            cout<<"Sizes of populations = ";\r
906            for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";\r
907            cout<<endl;\r
908 //         cout<<"POP Start index = ";\r
909 //         for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";\r
910 //         cout<<endl;\r
911            cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;\r
912                         \r
913    }\r
914    else{                                                                        // popSize stores the filename of the population profile during evoluation\r
915            fixedPOP = false;      \r
916         \r
917            currentProfile = 0;\r
918            \r
919            readPopProfile(popSize,nSubPOP);\r
920            \r
921            N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
922            \r
923            // print out the population profile information.\r
924            \r
925            cout<<"*** POPULATION PROFILE ***"<<endl;\r
926            cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;\r
927            cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;\r
928            \r
929            for(unsigned int i=0;i<popProfile.size();i++){\r
930                         cout<<"Generation = "<<popProfile[i].generation<<endl;\r
931                         cout<<"Sizes of populations= ";\r
932                         for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";\r
933                         cout<<endl;\r
934 //                      cout<<"POP Start index = ";\r
935 //                      for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";\r
936 //                      cout<<endl;\r
937                         if(popProfile[i].popChange.size()>0){\r
938                                 cout<<"Populations change backward in time:"<<endl;\r
939                                 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
940                                         cout<<" From "<<j+1<<" to ";\r
941                                         for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";\r
942                                         cout<<endl;\r
943                                 }\r
944                                 \r
945 //                              cout<<"Selection cumulative probability:"<<endl;   \r
946 //                              for(int j=0;j<popProfile[i].selectProb.size();j++){\r
947 //                                      cout<<" From "<<j+1<<" with prob = ";\r
948 //                                      for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";\r
949 //                                      cout<<endl;\r
950 //                              }\r
951                         }\r
952                         if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;\r
953                         cout<<endl;\r
954            }\r
955    }\r
956    \r
957    n=0;\r
958    nPOP=nSubPOP;\r
959    nSample.resize(nPOP);\r
960    for(int i=0;i<nPOP;i++){\r
961            if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");\r
962            n += nSubSample[i];\r
963            nSample[i]=nSubSample[i];\r
964    }\r
965    \r
966    L = numPieces;\r
967    length = pieceLen;\r
968    numChr = numIndepRegion;\r
969    SNP=s;\r
970    \r
971    if(atof(rec.c_str())>0){                             // if the recombination rate is fixed\r
972            recombination = atof(rec.c_str());\r
973    }\r
974    else{                                                // the distribution of recombination rate is described in the file (filename stored in rec)\r
975            recombination = -1;\r
976            readRecombination(rec,L);\r
977    }\r
978    \r
979    \r
980    \r
981    mutation = mut;\r
982    migration = mig;\r
983    \r
984    if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t\r
985    cout<<endl<<"random seed="<<t<<endl;\r
986    globalRandom.Reset(t);       \r
987    \r
988    if(printparameters){\r
989            printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);\r
990            cout<<"subSample = ";\r
991            for(int i=0;i<nPOP;i++){\r
992                    cout<<nSample[i]<<" ";\r
993            }\r
994            cout<<endl;\r
995    }\r
996    \r
997    if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){\r
998            cout<<"Input parameters have to be positive."<<endl;\r
999            printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);\r
1000            exit(0);\r
1001    }\r
1002 \r
1003    poolsize = 2 * n * L - L;\r
1004 \r
1005    AllocateMemory(return_chromosome);\r
1006 \r
1007 //    cout<<"memory OK!"<<endl<<endl;\r
1008    \r
1009         for(int chr=0;chr<numChr;chr++){\r
1010                 \r
1011            Initialize();\r
1012            \r
1013 //         cout<<"Initialization OK!"<<endl;\r
1014         \r
1015            bool done = false;\r
1016            int  generation = 0, units = n * L;\r
1017            \r
1018            while (!done)\r
1019               {\r
1020         //       printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%]   %c",\r
1021         //              generation++, units, nextFree * 100. / poolsize,\r
1022         //              !debug ? '\r' : '\n');\r
1023         \r
1024                   generation++;\r
1025                   \r
1026                   if(!fixedPOP && currentProfile<popProfile.size()-1)\r
1027                                 if(generation == popProfile[currentProfile+1].generation)noChange=false;\r
1028                   \r
1029               //if (debug)\r
1030               //   {\r
1031               //   printf("F  : ");\r
1032               //   for (int i = 0; i < L; i++)\r
1033               //      printf("%3d ", fragments[i]);\r
1034               //   printf("\n");\r
1035               //   }\r
1036         \r
1037               // Position of the first segment allocated to the current generation\r
1038               int generationStart = nextFree;\r
1039         \r
1040               // For each individual in the population\r
1041               for (int i = 0; i < N && !done; i++)\r
1042                  if (child_flags[i])\r
1043                     {\r
1044                             \r
1045                     /*if (debug) printf("%2d : ", i);*/\r
1046         \r
1047                     // The position we are currently tracking and its distance\r
1048                     // from the previous position\r
1049                     int distance = 0, parent = -1;\r
1050                     int startPiece = 0;\r
1051                     float recombProb;\r
1052            \r
1053                             for (int block = child_First[i]; block <= child_Last[i]; block++)\r
1054                 if (children[i][block] == NULL)\r
1055                    distance += BLOCKSIZE;\r
1056                 else\r
1057                    {\r
1058                    int pos = BLOCKSIZE * block;\r
1059                    int pos_in_block;\r
1060                                    int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );\r
1061 \r
1062                    while (pos <  pos_bound )\r
1063                        {\r
1064                                \r
1065                            pos_in_block = pos % BLOCKSIZE;\r
1066                        // Skip through uninformative positions along each chromosome\r
1067                        if (children[i][block][pos_in_block] == -1)\r
1068                           {\r
1069                           //if (debug) printf("  . \n");\r
1070                           distance++;\r
1071                           pos++;\r
1072                           continue;\r
1073                           }\r
1074                          \r
1075                        // Pick an ancestor at random for the current piece of chromosome\r
1076                        \r
1077                        if(recombination>0){                     // if the recombination rate is fixed over the genome\r
1078                                recombProb = (float)(pow(1.0 - recombination, distance));\r
1079                        }\r
1080                        else{                                            // if the recombination rate is specified by the profile\r
1081                                recombProb = 1;\r
1082                                for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);\r
1083                        }\r
1084                        \r
1085                     \r
1086                        if (parent == -1 ||\r
1087                            distance > 0 && globalRandom.Next() > recombProb){\r
1088                                     parent = SampleParent(i, parent);\r
1089                         }\r
1090                             \r
1091                        \r
1092                        TouchParent(parent, pos);\r
1093                        \r
1094                        \r
1095                        // Reset distance between segments\r
1096                        distance = 1;\r
1097                        startPiece=pos;\r
1098 \r
1099                        //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);\r
1100                        \r
1101                        // Check if we sampled a new ancestral segment ...\r
1102                        if (parents[parent][block][pos_in_block] == -1)\r
1103                           {\r
1104                           // We delay sampling a new gene from the pool until we know\r
1105                           // a coalescent event occured ...\r
1106                           parents[parent][block][pos_in_block] = children[i][block][pos_in_block];\r
1107                           \r
1108                           }\r
1109                        // Or if a coalescent event occured\r
1110                        else\r
1111                           {\r
1112                           // If we previously delayed sampling this parent\r
1113                           if (parents[parent][block][pos_in_block] < generationStart)\r
1114                             {\r
1115                                     genetimes[nextFree] = generation;\r
1116                                     genepool[parents[parent][block][pos_in_block]] = nextFree;\r
1117                                     parents[parent][block][pos_in_block] = nextFree++;\r
1118 \r
1119                                     if (nextFree == poolsize && units != 2)\r
1120                                        error("Genepool exhausted\n");\r
1121                             }\r
1122 \r
1123                           genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];\r
1124                           \r
1125                           fragments[pos]--;\r
1126                           units--;\r
1127 \r
1128                          if (fragments[pos] == 1)\r
1129                             {\r
1130                             // Reached the MRCA for this fragment\r
1131                             MRCAtimes[pos] = generation;\r
1132                             MRCA[pos] = parents[parent][block][pos_in_block];\r
1133                             \r
1134                             genepool[parents[parent][block][pos_in_block]]=0;\r
1135                             \r
1136                             parents[parent][block][pos_in_block] = -1;\r
1137                             units--;\r
1138          \r
1139                             // One less segment to track\r
1140                             if (--activeSegments == 0)\r
1141                                {\r
1142                                done = true;\r
1143                                break;\r
1144                                }\r
1145                             }\r
1146                          }\r
1147                       pos++;\r
1148                       }\r
1149                    }\r
1150                     }\r
1151                     \r
1152               NewGeneration();\r
1153             \r
1154               }\r
1155         \r
1156            if(drawtree){\r
1157                    cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;\r
1158                    drawtrees();\r
1159            }  \r
1160               \r
1161            // assign mutations\r
1162 \r
1163            cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;\r
1164            \r
1165            FreeCoalescentMemory();\r
1166            \r
1167            cout<<"Coalescent Memory free"<<endl;\r
1168            \r
1169            InitializeMutation(return_chromosome);\r
1170 \r
1171            cout<<"Mutation process initialized"<<endl;\r
1172                 \r
1173 //          cout<<"genepool:\t";\r
1174 //              for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
1175 //              cout<<endl;\r
1176 //              \r
1177 //              cout<<"genetimes:\t";\r
1178 //              for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
1179 //              cout<<endl;\r
1180 //         \r
1181 //              cout<<"nextFree="<<nextFree<<endl;\r
1182                 \r
1183 //              cout<<"mutation:\t";\r
1184 //              for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
1185 //              cout<<endl;\r
1186                 \r
1187            if(SNP<=0){\r
1188                    for(int i=0;i<nextFree;i++){\r
1189                                 if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation);   \r
1190                    }\r
1191            }\r
1192            else{\r
1193                    unsigned long * cumulativeCount = new unsigned long [nextFree];\r
1194                    \r
1195                    if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];\r
1196                    else cumulativeCount[0] = 0;\r
1197                    \r
1198                    for(int i=1;i<nextFree;i++){\r
1199                                 if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i]; \r
1200                                 else cumulativeCount[i] = cumulativeCount[i-1];  \r
1201                    }\r
1202                    \r
1203                    unsigned long totalGeneration = cumulativeCount[nextFree - 1];\r
1204                    \r
1205 //                 cout<<"Total Generation = "<<totalGeneration<<endl;\r
1206 //                 \r
1207 //                 if((unsigned long)SNP>totalGeneration){\r
1208 //                              cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl;   \r
1209 //                 }\r
1210                    \r
1211                    for(int i=0;i<SNP;i++){\r
1212                            \r
1213                            unsigned long randomLong = globalRandom.NextInt()%totalGeneration;\r
1214                            \r
1215                            int begin = 0;\r
1216                            int end = nextFree - 1;\r
1217                            int middle = (begin+end)/2;\r
1218                            \r
1219                            while(1){\r
1220                                    if(cumulativeCount[middle]<randomLong){\r
1221                                                 begin = middle;\r
1222                                    }\r
1223                                    else{\r
1224                                                 end = middle;\r
1225                                    }\r
1226                                    \r
1227                                    if(end == begin+1)break;\r
1228                                    \r
1229                                    middle = (begin+end)/2;\r
1230                            }\r
1231                            \r
1232                            if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){\r
1233                                         cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
1234                                         error("see above message!\n");\r
1235                            }\r
1236                            \r
1237                            int sampleIndex;\r
1238                            \r
1239                            if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;\r
1240                            else sampleIndex = end;\r
1241                            \r
1242                            if(genepool[sampleIndex]==0){\r
1243                                    cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
1244                                    error("see above message!\n");\r
1245                            }\r
1246                            \r
1247                            branchMutation[sampleIndex]++;\r
1248                    }\r
1249                    \r
1250                    delete [] cumulativeCount;\r
1251                    \r
1252            }\r
1253    \r
1254            delete [] genetimes;\r
1255            \r
1256            cout<<"Mutation assigned"<<endl;\r
1257             \r
1258 //      cout<<"mutation:\t";\r
1259 //              for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
1260 //              cout<<endl;\r
1261 \r
1262            AllocateMutationMemory();\r
1263            \r
1264            cout<<"Mutation Memory allocated"<<endl;\r
1265                 \r
1266            PrepareMutation();\r
1267         \r
1268            for(int i=0;i<n;i++){\r
1269                         for(int j=0;j<L;j++){\r
1270                                 if(branchMutation[i*L+j]>0){\r
1271                                         geneIndex[i*L+j] = fragmentLength[j];\r
1272                                         fragmentLength[j] += branchMutation[i*L+j];\r
1273                                 }\r
1274                                 branchFragment[i*L+j] = j;\r
1275                         }\r
1276            }   \r
1277               \r
1278            \r
1279            for(int i=0;i<nextFree;i++){\r
1280                         if(genepool[genepool[i]]!=0 && genepool[i]!=0){\r
1281                            if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){\r
1282                                    geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];\r
1283                                    fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];\r
1284                            }\r
1285                            branchFragment[genepool[i]]=branchFragment[i];\r
1286                         }\r
1287            }\r
1288            \r
1289            //cout<<"branchfragment done"<<endl;\r
1290            \r
1291 //      cout<<"branchFragment:\t";\r
1292 //              for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";\r
1293 //              cout<<endl;\r
1294 \r
1295 //      cout<<"geneIndex:\t";\r
1296 //              for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";\r
1297 //              cout<<endl;\r
1298 //      \r
1299 //              cout<<"fragmentLength:\t";\r
1300 //              for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";\r
1301 //              cout<<endl;\r
1302                 \r
1303 \r
1304 //          cout<<"genepool genetime mutation frag index\n";\r
1305 //              for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;\r
1306 //              cout<<endl;\r
1307            \r
1308 \r
1309    \r
1310        delete [] branchFragment;\r
1311        \r
1312            cout<<"Internal memory free"<<endl;\r
1313            \r
1314            // print the fragment index for each SNP\r
1315            cout<<"SNP genetic position scaled to [0,1]:"<<endl;\r
1316            if(L>1){\r
1317                    for(int i=0;i<L;i++){\r
1318                                 for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" ";   \r
1319                    }\r
1320            }\r
1321            else{\r
1322                    cout<<"Only one fragment to be simulated. No recombination between SNPs.";\r
1323            }\r
1324            cout<<endl;\r
1325                    \r
1326            int totalLength=0;\r
1327            if(SNP <=0){ \r
1328                    for(int i=0;i<L;i++)totalLength += fragmentLength[i];\r
1329            }\r
1330            \r
1331            \r
1332            for(int i=0;i<n;i++){\r
1333                         for(int j=0;j<L;j++){\r
1334                                 chromosome[i][j].resize(fragmentLength[j],0);\r
1335 \r
1336                                 for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;\r
1337                                 \r
1338                                 int current=i*L+j;\r
1339                 \r
1340                                 while(genepool[genepool[current]]!=0){\r
1341                                         for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;\r
1342                                         current=genepool[current];\r
1343                                 }\r
1344                         }\r
1345            }\r
1346            \r
1347            cout<<"Chromosome done"<<endl;\r
1348            \r
1349            FreeMutationMemory();\r
1350            \r
1351            if(SNP <=0){ \r
1352                    for(int i=0;i<n;i++){\r
1353                                 return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);\r
1354                    }\r
1355            }\r
1356            \r
1357            for(int i=0;i<n;i++){\r
1358                         for(int j=0;j<L;j++){\r
1359                                 return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());\r
1360                         }\r
1361            }     \r
1362            \r
1363            cout<<"Return chromosome done"<<endl;\r
1364            \r
1365            // print the chromosomes\r
1366 //         for(int i=0;i<n;i++){\r
1367 //                      printf("Chr %d: ",i);\r
1368 //                      for(int j=0;j<L;j++){\r
1369 //                              for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];\r
1370 //                              cout<<" ";      \r
1371 //                      }\r
1372 //                      cout<<endl;\r
1373 //         }       \r
1374 //         \r
1375 //         \r
1376 //        for(int i=0;i<n;i++){\r
1377 //                      printf("return Chr %d: ",i);\r
1378 //                      for(int j=0;j<return_chromosome[i].size();j++){\r
1379 //                              cout<<return_chromosome[i][j];\r
1380 //                      }\r
1381 //                      cout<<endl;\r
1382 //         }\r
1383 \r
1384            \r
1385    \r
1386    }\r
1387    \r
1388    FreeMemory();\r
1389    \r
1390    printf("\nDone simulating ARG ...\n");\r
1391    \r
1392    \r
1393    \r
1394 }\r
1395 \r
1396 \r