1 //////////////////////////////////////////////////////////////////////
\r
3 //////////////////////////////////////////////////////////////////////////////
\r
4 // COPYRIGHT NOTICE FOR GENOME CODE
\r
6 // Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,
\r
8 // All rights reserved.
\r
10 // Redistribution and use in source and binary forms, with or without
\r
11 // modification, are permitted provided that the following conditions
\r
14 // 1. Redistributions of source code must retain the above copyright
\r
15 // notice, this list of conditions and the following disclaimer.
\r
17 // 2. Redistributions in binary form must reproduce the above copyright
\r
18 // notice, this list of conditions and the following disclaimer in the
\r
19 // documentation and/or other materials provided with the distribution.
\r
21 // 3. The names of its contributors may not be used to endorse or promote
\r
22 // products derived from this software without specific prior written
\r
25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
\r
26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
\r
27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
\r
28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
\r
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
\r
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
\r
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
\r
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
\r
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
\r
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
\r
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
37 ///////////////////////////////////////////////////////////////////////////////
\r
40 #define _CRT_SECURE_NO_DEPRECATE
\r
44 #include "stochastic.h"
\r
50 #include <algorithm>
\r
56 using namespace std;
\r
58 //static bool debug = false;
\r
59 static bool fixedPOP = true;
\r
60 static bool noChange = true;
\r
63 static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment
\r
65 static unsigned int currentProfile;
\r
67 static vector<int> nSample(nPOP,3);
\r
68 static double recombination = 1e-8, migration = 0.0, mutation=1e-8;
\r
70 static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;
\r
71 static int *** children, *** parents, * cblocks, * pblocks;
\r
72 static int * parent_First, * parent_Last, * child_First, * child_Last;
\r
73 static bool * child_flags, * parent_flags;
\r
74 static int nextFree = 0, activeSegments = 0, pblockcounter = 0;
\r
75 static int numBLOCKs;
\r
76 static int BUFFER=1500;
\r
77 static int INCREMENT=1000;
\r
79 #define BLOCKSIZE 128
\r
80 #define SWAP(x,y) do {typeof(x) temp##x##y = x; x = y; y = temp##x##y; } while (0)
\r
82 static vector< vector< vector<bool> > > chromosome;
\r
83 static int * fragmentLength;
\r
84 static int * branchFragment;
\r
85 static int * geneIndex;
\r
86 static int * branchMutation;
\r
88 static float * recombVector=NULL;
\r
90 struct popProfilesStruct{
\r
92 vector<int> popsize;
\r
93 vector<int> popStart;
\r
94 vector< vector<float> > selectProb;
\r
95 vector< vector<int> > popChange;
\r
98 static vector<struct popProfilesStruct> popProfile;
\r
101 cout<<"\n*****************\nParents:\t";
\r
102 for(int i=0;i<N;i++){
\r
103 if(parent_flags[i]){
\r
104 for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";
\r
110 cout<<"Children:\t";
\r
111 for(int i=0;i<N;i++){
\r
112 if(child_flags[i]){
\r
113 for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";
\r
121 // cout<<"parent_flag:\t";
\r
122 // for(int i=0;i<N;i++){
\r
123 // cout<<parent_flags[i]<<"\t";
\r
127 // cout<<"children_flag:\t";
\r
128 // for(int i=0;i<N;i++){
\r
129 // cout<<child_flags[i]<<"\t";
\r
133 cout<<"genepool:\t";
\r
134 for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";
\r
137 cout<<"genetimes:\t";
\r
138 for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";
\r
142 for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";
\r
145 cout<<"MCRAtime:\t";
\r
146 for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";
\r
149 cout<<"fragments:\t";
\r
150 for(int i=0;i<L;i++)cout<<fragments[i]<<" ";
\r
153 cout<<"NextFree="<<nextFree<<endl;
\r
156 int *** AllocateIntPtrMatrix(int rows, int columns){
\r
157 int *** result = new int ** [rows];
\r
159 for (int i = 0; i < rows; i++)
\r
160 result[i] = new int * [columns];
\r
165 void FreeIntPtrMatrix(int *** & matrix, int rows){
\r
167 for (int i = 0; i < rows; i++)
\r
168 delete [] matrix[i];
\r
175 void NewGeneration(){
\r
177 // for(int i=0;i<maxN;i++){
\r
178 // if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]);
\r
181 SWAP(children, parents);
\r
182 SWAP(child_First, parent_First);
\r
183 SWAP(child_Last, parent_Last);
\r
184 SWAP(child_flags, parent_flags);
\r
185 SWAP(cblocks, pblocks);
\r
189 nPOP = (int)(popProfile[currentProfile].popsize.size());
\r
190 N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();
\r
194 for (int i = 0; i < maxN; i++)
\r
195 parent_flags[i] = 0;
\r
197 // for (int i = 0; i < maxN; i++)
\r
198 // for (int j = 0; j < numBLOCKs; j++)
\r
199 // parents[i][j] = NULL;
\r
206 void AllocateMemory(vector< vector<bool> > &return_chromosome){
\r
210 // cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;
\r
213 // cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;
\r
215 // cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;
\r
218 return_chromosome.clear();
\r
219 return_chromosome.resize(n);
\r
220 // for(int i=0;i<n;i++){
\r
222 // //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));
\r
225 // //return_chromosome[i].reserve(SNP*numChr);
\r
229 chromosome.resize(n);
\r
230 for(int i=0;i<n;i++){
\r
231 chromosome[i].resize(L);
\r
232 // for(int j=0;j<L;j++){
\r
234 // //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));
\r
237 // //chromosome[i][j].reserve(SNP/L+1);
\r
242 genepool = new int [poolsize];
\r
244 numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);
\r
251 void AllocateMutationMemory(){
\r
253 fragmentLength = new int [L];
\r
255 branchFragment = new int [poolsize];
\r
257 geneIndex = new int [poolsize];
\r
265 delete [] genepool;
\r
267 if(recombVector!=NULL)delete [] recombVector;
\r
272 void FreeMutationMemory(){
\r
274 delete [] fragmentLength;
\r
275 delete [] geneIndex;
\r
276 delete [] branchMutation;
\r
281 void FreeCoalescentMemory(){
\r
284 delete [] MRCAtimes;
\r
286 delete [] fragments;
\r
288 FreeIntPtrMatrix(children,maxN);
\r
290 FreeIntPtrMatrix(parents,maxN);
\r
292 delete [] child_flags;
\r
293 delete [] parent_flags;
\r
298 delete [] parent_First;
\r
299 delete [] parent_Last;
\r
301 delete [] child_First;
\r
302 delete [] child_Last;
\r
306 void reallocBlock(){
\r
308 pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );
\r
309 cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );
\r
310 BUFFER += INCREMENT;
\r
314 void setnull(int parent, int position){
\r
316 if (parent_flags[parent] == 0){
\r
317 parent_First[parent]=position / BLOCKSIZE;
\r
318 parent_Last[parent]=position / BLOCKSIZE;
\r
319 parents[parent][position / BLOCKSIZE] = NULL;
\r
321 else if(parent_First[parent]>position / BLOCKSIZE){
\r
322 for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){
\r
323 parents[parent][i] = NULL;
\r
325 parent_First[parent]=position / BLOCKSIZE;
\r
327 else if(parent_Last[parent]<position / BLOCKSIZE){
\r
328 for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){
\r
329 parents[parent][i] = NULL;
\r
331 parent_Last[parent]=position / BLOCKSIZE;
\r
338 void TouchParent(int parent, int position){
\r
341 setnull(parent,position);
\r
343 if (parents[parent][position / BLOCKSIZE] != NULL)return;
\r
346 if (pblockcounter >= n*numBLOCKs+BUFFER){
\r
348 int * pblocks_old=pblocks;
\r
349 int * cblocks_old=cblocks;
\r
353 for(int i=0;i<N;i++)
\r
354 if(parent_flags[i])
\r
355 for(int j=parent_First[i];j<=parent_Last[i];j++)
\r
356 if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;
\r
358 for(int i=0;i<N;i++)
\r
360 for(int j=child_First[i];j<=child_Last[i];j++)
\r
361 if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;
\r
363 printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);
\r
365 if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");
\r
368 parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;
\r
370 for (int i = 0; i < BLOCKSIZE; i++)
\r
371 parents[parent] [position / BLOCKSIZE] [i] = -1;
\r
373 parent_flags[parent] = 1;
\r
379 genetimes = new int [poolsize];
\r
381 MRCA = new int [L];
\r
382 MRCAtimes = new int [L];
\r
384 fragments = new int [L];
\r
386 children = AllocateIntPtrMatrix(maxN, numBLOCKs);
\r
387 parents = AllocateIntPtrMatrix(maxN, numBLOCKs);
\r
389 parent_First = new int [maxN];
\r
390 parent_Last = new int [maxN];
\r
392 child_First = new int [maxN];
\r
393 child_Last = new int [maxN];
\r
395 cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));
\r
396 pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));
\r
398 child_flags = new bool [maxN];
\r
399 parent_flags = new bool [maxN];
\r
404 nPOP = (int)(popProfile[0].popsize.size());
\r
406 N = popProfile[0].popStart.back()+popProfile[0].popsize.back();
\r
408 currentProfile = 0;
\r
412 for (int i = 0; i < N; i++)
\r
413 child_flags[i] = 0;
\r
416 for(int k=0;k<nPOP;k++){
\r
417 for (int i = 0; i < nSample[k]; i++)
\r
419 child_First[popProfile[0].popStart[k]+i]=0;
\r
420 child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;
\r
422 child_flags[popProfile[0].popStart[k]+i] = 1;
\r
424 for (int j = 0; j < numBLOCKs; j++)
\r
425 children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);
\r
427 for (int j = 0; j < L; j++)
\r
429 genepool[nextFree] = 1;
\r
430 genetimes[nextFree] = 0;
\r
431 children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;
\r
438 for (int i = 0; i < L; i++)
\r
441 for (int i = 0; i < N; i++)
\r
442 parent_flags[i] = 0;
\r
444 activeSegments = L;
\r
446 //for (int i = 0; i < N; i++)
\r
447 // for (int j = 0; j < numBLOCKs; j++)
\r
448 // parents[i][j] = NULL;
\r
454 void InitializeMutation(vector< vector<bool> > &return_chromosome){
\r
456 // return_chromosome.clear();
\r
457 // return_chromosome.resize(n);
\r
458 // for(int i=0;i<n;i++){
\r
459 // return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));
\r
462 // chromosome.resize(n);
\r
463 // for(int i=0;i<n;i++){
\r
464 // chromosome[i].resize(L);
\r
465 // for(int j=0;j<L;j++){
\r
466 // chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));
\r
470 // cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;
\r
472 for(int i=0;i<n;i++){
\r
473 for(int j=0;j<L;j++){
\r
474 chromosome[i][j].clear();
\r
478 // cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;
\r
480 branchMutation = new int [nextFree];
\r
481 for(int i=0;i<nextFree;i++)branchMutation[i]=0;
\r
485 void PrepareMutation(){
\r
487 for(int i=0;i<L;i++)fragmentLength[i]=0;
\r
489 for(int i=0;i<nextFree;i++){
\r
490 geneIndex[i] = -1;
\r
491 branchFragment[i] = -1;
\r
496 int SampleParent(int individual, int previous){
\r
499 int population = individual/popProfile[0].popsize[0];
\r
501 if (previous == -1 && nPOP>1){
\r
502 if (globalRandom.Next() < migration){
\r
503 int random = globalRandom.NextInt()%(nPOP-1);
\r
504 if(population<=random)population=random+1;
\r
505 else population=random;
\r
509 return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];
\r
516 for(int i=0;i<nPOP;i++){
\r
517 if(individual>=popProfile[currentProfile].popStart[i])population=i;
\r
521 // cout<<"No change: ind="<<individual<<" pop="<<population<<endl;
\r
523 if (previous == -1 && nPOP>1){
\r
524 if (globalRandom.Next() < migration){
\r
525 int random = globalRandom.NextInt()%(nPOP-1);
\r
526 if(population<=random)population=random+1;
\r
527 else population=random;
\r
531 // cout<<"new pop="<<population<<endl;
\r
533 return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];
\r
538 for(int i=0;i<nPOP;i++){
\r
539 if(individual>=popProfile[currentProfile].popStart[i])population=i;
\r
542 // cout<<"Change: ind="<<individual<<" pop="<<population<<endl;
\r
544 if (previous == -1 && nPOP>1){
\r
545 if (globalRandom.Next() < migration){
\r
546 int random = globalRandom.NextInt()%(nPOP-1);
\r
547 if(population<=random)population=random+1;
\r
548 else population=random;
\r
552 // cout<<"new pop="<<population<<endl;
\r
554 if(popProfile[currentProfile].popChange[population].size()==1){
\r
555 // cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;
\r
557 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]])
\r
558 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]];
\r
561 double selectRandom = globalRandom.Next();
\r
562 for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){
\r
563 if(selectRandom < popProfile[currentProfile].selectProb[population][i]){
\r
565 // cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;
\r
567 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]])
\r
568 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];
\r
571 cout<<"Random number generator error!"<<endl;
\r
581 void readPopProfile(string filename, unsigned int numPopulation){
\r
583 int popProfile_index=0;
\r
585 popProfile.clear();
\r
586 struct popProfilesStruct temp_popProfile;
\r
587 popProfile.push_back(temp_popProfile);
\r
594 popFile.open(filename.c_str());
\r
595 if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());
\r
597 getline(popFile,fileline); // the first line is the population sizes at generation 0
\r
599 term = strtok ((char*)fileline.c_str()," \t"); // the first term is the generation
\r
600 popProfile[popProfile_index].generation=atoi(term);
\r
602 if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");
\r
604 term = strtok (NULL, " \t"); // the second term and after are the sizes of that population
\r
606 popProfile[popProfile_index].popsize.push_back(atoi(term));
\r
607 term = strtok (NULL, " \t");
\r
610 popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0
\r
611 for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){
\r
612 popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());
\r
615 maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();
\r
617 if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());
\r
619 // read in the population correspondence to the next population profile
\r
622 while(popFile.peek()!=EOF){
\r
623 getline(popFile,fileline); // the even number lines are the correspondence of populations of different generation
\r
624 popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());
\r
626 term = strtok ((char*)fileline.c_str(),"- \t"); // the FROM population
\r
629 term1 = strtok (NULL, "- \t"); // the TO population
\r
631 popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);
\r
633 term = strtok (NULL, "- \t");
\r
636 popProfile_index++;
\r
637 popProfile.push_back(temp_popProfile);
\r
639 if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");
\r
641 // read the next population profile
\r
643 getline(popFile,fileline); // the population sizes
\r
645 term = strtok ((char*)fileline.c_str()," \t"); // the first term is the generation
\r
646 popProfile[popProfile_index].generation=atoi(term);
\r
648 term = strtok (NULL, " \t"); // the second term and after are the sizes of that population
\r
650 popProfile[popProfile_index].popsize.push_back(atoi(term));
\r
651 term = strtok (NULL, " \t");
\r
654 popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0
\r
655 for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){
\r
656 popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());
\r
659 if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())
\r
660 maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();
\r
663 for(unsigned int i=0;i<popProfile.size();i++){
\r
664 popProfile[i].selectProb.resize(popProfile[i].popChange.size());
\r
665 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){
\r
667 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];
\r
669 popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);
\r
670 for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)
\r
671 popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);
\r
674 // check for consistence
\r
678 for(unsigned int i=0;i<popProfile.size()-1;i++){
\r
680 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){
\r
681 if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1);
\r
682 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){
\r
683 if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];
\r
684 if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];
\r
687 if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))
\r
688 error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());
\r
694 void readRecombination(string filename, int pieces){
\r
696 ifstream recombFile;
\r
700 vector<float> recDistribution;
\r
701 vector<float> recRate;
\r
706 recombFile.open(filename.c_str());
\r
707 if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());
\r
709 getline(recombFile,fileline); // the first line is head of distribution table
\r
710 term = strtok ((char*)fileline.c_str()," \t");
\r
711 term = strtok (NULL, " \t");
\r
713 if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){
\r
714 while(recombFile.peek()!=EOF){
\r
715 getline(recombFile,fileline);
\r
717 term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate
\r
718 recRate.push_back((float)(atof(term)));
\r
720 if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));
\r
722 term = strtok (NULL, " \t"); // the second number is the frequency of recombination rate
\r
723 recDistribution.push_back((float)(atof(term)));
\r
725 if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));
\r
727 sum += (float)(atof(term));
\r
730 if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());
\r
732 // normalize the frequency
\r
733 for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;
\r
735 // compute the cumulative distribution
\r
736 for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];
\r
738 recombVector = new float [pieces-1];
\r
740 for(int i=0;i<pieces-1;i++){
\r
741 random = (float)(globalRandom.Next());
\r
742 for(unsigned int j=0;j<recDistribution.size();j++){
\r
743 if(random < recDistribution[j]){
\r
744 recombVector[i]=recRate[j];
\r
751 cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;
\r
752 cout<<"Recombination_Rate\tCumulative_frequency"<<endl;
\r
753 for(unsigned int i=0;i<recRate.size();i++){
\r
754 cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl;
\r
758 else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){
\r
759 float currentRec=0.0,nextRec=0.0;
\r
762 if(recombFile.peek()!=EOF){
\r
763 getline(recombFile,fileline);
\r
764 term = strtok ((char*)fileline.c_str()," \t");
\r
765 currentRec=(float)(atof(term));
\r
767 if(recombFile.peek()!=EOF){
\r
768 getline(recombFile,fileline);
\r
769 term = strtok ((char*)fileline.c_str()," \t");
\r
770 nextRec=(float)(atof(term));
\r
771 term = strtok (NULL, " \t");
\r
772 nextPos=atoi(term);
\r
780 error("Recombination file does not contain recombination rate information.\n");
\r
783 recombVector = new float [pieces-1];
\r
785 for(int i=0;i<pieces-1;i++){
\r
787 currentRec=nextRec;
\r
788 if(recombFile.peek()!=EOF){
\r
789 getline(recombFile,fileline);
\r
790 term = strtok ((char*)fileline.c_str()," \t");
\r
791 nextRec=(float)(atof(term));
\r
792 term = strtok (NULL, " \t");
\r
793 nextPos=atoi(term);
\r
800 recombVector[i]=currentRec;
\r
807 error("The second column is not \"freq\" or \"pos\".\n");
\r
813 cout<<"The recombination rates between fragments are:"<<endl<<"\t";
\r
814 for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";
\r
815 cout<<"("<<pieces<<")"<<endl;
\r
816 cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;
\r
822 map<int,string > tree;
\r
823 ostringstream temp;
\r
826 for(int i=0;i<L;i++){
\r
829 for(int j=0;j<n;j++){
\r
830 if(tree[genepool[j*L+i]]!=""){
\r
832 temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];
\r
833 tree[genepool[j*L+i]]= temp.str();
\r
837 temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];
\r
838 tree[genepool[j*L+i]]= temp.str();
\r
843 for(int j=n*L;j<nextFree;j++){
\r
845 if(genepool[j]==0){
\r
846 cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;
\r
849 if(tree[genepool[j]]!=""){
\r
851 temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];
\r
852 tree[genepool[j]]= temp.str();
\r
856 temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];
\r
857 tree[genepool[j]]=temp.str();
\r
870 void genome(string popSize, // effective population size of each population
\r
871 int nSubPOP, // number of populations
\r
872 vector<int> & nSubSample, // number of samples (haplotypes) draw from each populations
\r
873 int numPieces, // number of fragments for each sample (chromosome)
\r
874 int pieceLen, // length in base pair of each fragment
\r
875 int numIndepRegion, // number of independent regions (independent chromosome)
\r
876 int s, // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution
\r
877 string rec, // recombination rate between consecutive fragments per generation
\r
878 double mut, // mutation rate per generation per base pair
\r
879 double mig, // migration rate per generation
\r
880 vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome
\r
881 long t, // random seed
\r
882 int drawtree, // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees
\r
883 bool printparameters // =1 print out all input parameters, =0 do not print input parameters
\r
886 if(atoi(popSize.c_str())>0){ // if the population size is fixed during evoluation
\r
887 N = atoi(popSize.c_str())*nSubPOP;
\r
892 popProfile.clear();
\r
893 struct popProfilesStruct temp_popProfile;
\r
894 popProfile.push_back(temp_popProfile);
\r
896 popProfile[0].generation = 0;
\r
898 for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));
\r
899 for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));
\r
901 cout<<"*** POPULATION PROFILE ***"<<endl;
\r
902 cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;
\r
903 cout<<"Sizes of populations = ";
\r
904 for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";
\r
906 // cout<<"POP Start index = ";
\r
907 // for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";
\r
909 cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;
\r
912 else{ // popSize stores the filename of the population profile during evoluation
\r
915 currentProfile = 0;
\r
917 readPopProfile(popSize,nSubPOP);
\r
919 N = popProfile[0].popStart.back()+popProfile[0].popsize.back();
\r
921 // print out the population profile information.
\r
923 cout<<"*** POPULATION PROFILE ***"<<endl;
\r
924 cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;
\r
925 cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;
\r
927 for(unsigned int i=0;i<popProfile.size();i++){
\r
928 cout<<"Generation = "<<popProfile[i].generation<<endl;
\r
929 cout<<"Sizes of populations= ";
\r
930 for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";
\r
932 // cout<<"POP Start index = ";
\r
933 // for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";
\r
935 if(popProfile[i].popChange.size()>0){
\r
936 cout<<"Populations change backward in time:"<<endl;
\r
937 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){
\r
938 cout<<" From "<<j+1<<" to ";
\r
939 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";
\r
943 // cout<<"Selection cumulative probability:"<<endl;
\r
944 // for(int j=0;j<popProfile[i].selectProb.size();j++){
\r
945 // cout<<" From "<<j+1<<" with prob = ";
\r
946 // for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";
\r
950 if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;
\r
957 nSample.resize(nPOP);
\r
958 for(int i=0;i<nPOP;i++){
\r
959 if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");
\r
960 n += nSubSample[i];
\r
961 nSample[i]=nSubSample[i];
\r
966 numChr = numIndepRegion;
\r
969 if(atof(rec.c_str())>0){ // if the recombination rate is fixed
\r
970 recombination = atof(rec.c_str());
\r
972 else{ // the distribution of recombination rate is described in the file (filename stored in rec)
\r
973 recombination = -1;
\r
974 readRecombination(rec,L);
\r
982 if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t
\r
983 cout<<endl<<"random seed="<<t<<endl;
\r
984 globalRandom.Reset(t);
\r
986 if(printparameters){
\r
987 printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);
\r
988 cout<<"subSample = ";
\r
989 for(int i=0;i<nPOP;i++){
\r
990 cout<<nSample[i]<<" ";
\r
995 if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){
\r
996 cout<<"Input parameters have to be positive."<<endl;
\r
997 printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);
\r
1001 poolsize = 2 * n * L - L;
\r
1003 AllocateMemory(return_chromosome);
\r
1005 // cout<<"memory OK!"<<endl<<endl;
\r
1007 for(int chr=0;chr<numChr;chr++){
\r
1011 // cout<<"Initialization OK!"<<endl;
\r
1013 bool done = false;
\r
1014 int generation = 0, units = n * L;
\r
1018 // printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%] %c",
\r
1019 // generation++, units, nextFree * 100. / poolsize,
\r
1020 // !debug ? '\r' : '\n');
\r
1024 if(!fixedPOP && currentProfile<popProfile.size()-1)
\r
1025 if(generation == popProfile[currentProfile+1].generation)noChange=false;
\r
1029 // printf("F : ");
\r
1030 // for (int i = 0; i < L; i++)
\r
1031 // printf("%3d ", fragments[i]);
\r
1035 // Position of the first segment allocated to the current generation
\r
1036 int generationStart = nextFree;
\r
1038 // For each individual in the population
\r
1039 for (int i = 0; i < N && !done; i++)
\r
1040 if (child_flags[i])
\r
1043 /*if (debug) printf("%2d : ", i);*/
\r
1045 // The position we are currently tracking and its distance
\r
1046 // from the previous position
\r
1047 int distance = 0, parent = -1;
\r
1048 int startPiece = 0;
\r
1051 for (int block = child_First[i]; block <= child_Last[i]; block++)
\r
1052 if (children[i][block] == NULL)
\r
1053 distance += BLOCKSIZE;
\r
1056 int pos = BLOCKSIZE * block;
\r
1058 int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );
\r
1060 while (pos < pos_bound )
\r
1063 pos_in_block = pos % BLOCKSIZE;
\r
1064 // Skip through uninformative positions along each chromosome
\r
1065 if (children[i][block][pos_in_block] == -1)
\r
1067 //if (debug) printf(" . \n");
\r
1073 // Pick an ancestor at random for the current piece of chromosome
\r
1075 if(recombination>0){ // if the recombination rate is fixed over the genome
\r
1076 recombProb = (float)(pow(1.0 - recombination, distance));
\r
1078 else{ // if the recombination rate is specified by the profile
\r
1080 for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);
\r
1084 if (parent == -1 ||
\r
1085 (distance > 0 && globalRandom.Next() > recombProb)){
\r
1086 parent = SampleParent(i, parent);
\r
1090 TouchParent(parent, pos);
\r
1093 // Reset distance between segments
\r
1097 //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);
\r
1099 // Check if we sampled a new ancestral segment ...
\r
1100 if (parents[parent][block][pos_in_block] == -1)
\r
1102 // We delay sampling a new gene from the pool until we know
\r
1103 // a coalescent event occured ...
\r
1104 parents[parent][block][pos_in_block] = children[i][block][pos_in_block];
\r
1107 // Or if a coalescent event occured
\r
1110 // If we previously delayed sampling this parent
\r
1111 if (parents[parent][block][pos_in_block] < generationStart)
\r
1113 genetimes[nextFree] = generation;
\r
1114 genepool[parents[parent][block][pos_in_block]] = nextFree;
\r
1115 parents[parent][block][pos_in_block] = nextFree++;
\r
1117 if (nextFree == poolsize && units != 2)
\r
1118 error("Genepool exhausted\n");
\r
1121 genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];
\r
1126 if (fragments[pos] == 1)
\r
1128 // Reached the MRCA for this fragment
\r
1129 MRCAtimes[pos] = generation;
\r
1130 MRCA[pos] = parents[parent][block][pos_in_block];
\r
1132 genepool[parents[parent][block][pos_in_block]]=0;
\r
1134 parents[parent][block][pos_in_block] = -1;
\r
1137 // One less segment to track
\r
1138 if (--activeSegments == 0)
\r
1155 cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;
\r
1159 // assign mutations
\r
1161 cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;
\r
1163 FreeCoalescentMemory();
\r
1165 cout<<"Coalescent Memory free"<<endl;
\r
1167 InitializeMutation(return_chromosome);
\r
1169 cout<<"Mutation process initialized"<<endl;
\r
1171 // cout<<"genepool:\t";
\r
1172 // for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";
\r
1175 // cout<<"genetimes:\t";
\r
1176 // for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";
\r
1179 // cout<<"nextFree="<<nextFree<<endl;
\r
1181 // cout<<"mutation:\t";
\r
1182 // for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";
\r
1186 for(int i=0;i<nextFree;i++){
\r
1187 if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation);
\r
1191 unsigned long * cumulativeCount = new unsigned long [nextFree];
\r
1193 if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];
\r
1194 else cumulativeCount[0] = 0;
\r
1196 for(int i=1;i<nextFree;i++){
\r
1197 if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i];
\r
1198 else cumulativeCount[i] = cumulativeCount[i-1];
\r
1201 unsigned long totalGeneration = cumulativeCount[nextFree - 1];
\r
1203 // cout<<"Total Generation = "<<totalGeneration<<endl;
\r
1205 // if((unsigned long)SNP>totalGeneration){
\r
1206 // cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl;
\r
1209 for(int i=0;i<SNP;i++){
\r
1211 unsigned long randomLong = globalRandom.NextInt()%totalGeneration;
\r
1214 int end = nextFree - 1;
\r
1215 int middle = (begin+end)/2;
\r
1218 if(cumulativeCount[middle]<randomLong){
\r
1225 if(end == begin+1)break;
\r
1227 middle = (begin+end)/2;
\r
1230 if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){
\r
1231 cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;
\r
1232 error("see above message!\n");
\r
1237 if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;
\r
1238 else sampleIndex = end;
\r
1240 if(genepool[sampleIndex]==0){
\r
1241 cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;
\r
1242 error("see above message!\n");
\r
1245 branchMutation[sampleIndex]++;
\r
1248 delete [] cumulativeCount;
\r
1252 delete [] genetimes;
\r
1254 cout<<"Mutation assigned"<<endl;
\r
1256 // cout<<"mutation:\t";
\r
1257 // for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";
\r
1260 AllocateMutationMemory();
\r
1262 cout<<"Mutation Memory allocated"<<endl;
\r
1264 PrepareMutation();
\r
1266 for(int i=0;i<n;i++){
\r
1267 for(int j=0;j<L;j++){
\r
1268 if(branchMutation[i*L+j]>0){
\r
1269 geneIndex[i*L+j] = fragmentLength[j];
\r
1270 fragmentLength[j] += branchMutation[i*L+j];
\r
1272 branchFragment[i*L+j] = j;
\r
1277 for(int i=0;i<nextFree;i++){
\r
1278 if(genepool[genepool[i]]!=0 && genepool[i]!=0){
\r
1279 if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){
\r
1280 geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];
\r
1281 fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];
\r
1283 branchFragment[genepool[i]]=branchFragment[i];
\r
1287 //cout<<"branchfragment done"<<endl;
\r
1289 // cout<<"branchFragment:\t";
\r
1290 // for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";
\r
1293 // cout<<"geneIndex:\t";
\r
1294 // for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";
\r
1297 // cout<<"fragmentLength:\t";
\r
1298 // for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";
\r
1302 // cout<<"genepool genetime mutation frag index\n";
\r
1303 // for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;
\r
1308 delete [] branchFragment;
\r
1310 cout<<"Internal memory free"<<endl;
\r
1312 // print the fragment index for each SNP
\r
1313 cout<<"SNP genetic position scaled to [0,1]:"<<endl;
\r
1315 for(int i=0;i<L;i++){
\r
1316 for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" ";
\r
1320 cout<<"Only one fragment to be simulated. No recombination between SNPs.";
\r
1324 int totalLength=0;
\r
1326 for(int i=0;i<L;i++)totalLength += fragmentLength[i];
\r
1330 for(int i=0;i<n;i++){
\r
1331 for(int j=0;j<L;j++){
\r
1332 chromosome[i][j].resize(fragmentLength[j],0);
\r
1334 for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;
\r
1336 int current=i*L+j;
\r
1338 while(genepool[genepool[current]]!=0){
\r
1339 for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;
\r
1340 current=genepool[current];
\r
1345 cout<<"Chromosome done"<<endl;
\r
1347 FreeMutationMemory();
\r
1350 for(int i=0;i<n;i++){
\r
1351 return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);
\r
1355 for(int i=0;i<n;i++){
\r
1356 for(int j=0;j<L;j++){
\r
1357 return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());
\r
1361 cout<<"Return chromosome done"<<endl;
\r
1363 // print the chromosomes
\r
1364 // for(int i=0;i<n;i++){
\r
1365 // printf("Chr %d: ",i);
\r
1366 // for(int j=0;j<L;j++){
\r
1367 // for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];
\r
1374 // for(int i=0;i<n;i++){
\r
1375 // printf("return Chr %d: ",i);
\r
1376 // for(int j=0;j<return_chromosome[i].size();j++){
\r
1377 // cout<<return_chromosome[i][j];
\r
1388 printf("\nDone simulating ARG ...\n");
\r