1 //////////////////////////////////////////////////////////////////////
\r
3 //////////////////////////////////////////////////////////////////////////////
\r
4 // COPYRIGHT NOTICE FOR GENOME CODE
\r
6 // Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,
\r
8 // All rights reserved.
\r
10 // Redistribution and use in source and binary forms, with or without
\r
11 // modification, are permitted provided that the following conditions
\r
14 // 1. Redistributions of source code must retain the above copyright
\r
15 // notice, this list of conditions and the following disclaimer.
\r
17 // 2. Redistributions in binary form must reproduce the above copyright
\r
18 // notice, this list of conditions and the following disclaimer in the
\r
19 // documentation and/or other materials provided with the distribution.
\r
21 // 3. The names of its contributors may not be used to endorse or promote
\r
22 // products derived from this software without specific prior written
\r
25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
\r
26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
\r
27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
\r
28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
\r
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
\r
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
\r
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
\r
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
\r
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
\r
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
\r
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
37 ///////////////////////////////////////////////////////////////////////////////
\r
40 #define _CRT_SECURE_NO_DEPRECATE
\r
44 #include "stochastic.h"
\r
50 #include <algorithm>
\r
55 using namespace std;
\r
57 //static bool debug = false;
\r
58 static bool fixedPOP = true;
\r
59 static bool noChange = true;
\r
62 static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment
\r
64 static unsigned int currentProfile;
\r
66 static vector<int> nSample(nPOP,3);
\r
67 static double recombination = 1e-8, migration = 0.0, mutation=1e-8;
\r
69 static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;
\r
70 static int *** children, *** parents, * cblocks, * pblocks;
\r
71 static int * parent_First, * parent_Last, * child_First, * child_Last;
\r
72 static bool * child_flags, * parent_flags;
\r
73 static int nextFree = 0, activeSegments = 0, pblockcounter = 0;
\r
74 static int numBLOCKs;
\r
75 static int BUFFER=1500;
\r
76 static int INCREMENT=1000;
\r
78 #define BLOCKSIZE 128
\r
79 #define SWAP(tmp,a,b) tmp=a; a=b; b=tmp;
\r
81 static vector< vector< vector<bool> > > chromosome;
\r
82 static int * fragmentLength;
\r
83 static int * branchFragment;
\r
84 static int * geneIndex;
\r
85 static int * branchMutation;
\r
87 static float * recombVector=NULL;
\r
89 struct popProfilesStruct{
\r
91 vector<int> popsize;
\r
92 vector<int> popStart;
\r
93 vector< vector<float> > selectProb;
\r
94 vector< vector<int> > popChange;
\r
97 static vector<struct popProfilesStruct> popProfile;
\r
100 cout<<"\n*****************\nParents:\t";
\r
101 for(int i=0;i<N;i++){
\r
102 if(parent_flags[i]){
\r
103 for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";
\r
109 cout<<"Children:\t";
\r
110 for(int i=0;i<N;i++){
\r
111 if(child_flags[i]){
\r
112 for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";
\r
120 // cout<<"parent_flag:\t";
\r
121 // for(int i=0;i<N;i++){
\r
122 // cout<<parent_flags[i]<<"\t";
\r
126 // cout<<"children_flag:\t";
\r
127 // for(int i=0;i<N;i++){
\r
128 // cout<<child_flags[i]<<"\t";
\r
132 cout<<"genepool:\t";
\r
133 for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";
\r
136 cout<<"genetimes:\t";
\r
137 for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";
\r
141 for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";
\r
144 cout<<"MCRAtime:\t";
\r
145 for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";
\r
148 cout<<"fragments:\t";
\r
149 for(int i=0;i<L;i++)cout<<fragments[i]<<" ";
\r
152 cout<<"NextFree="<<nextFree<<endl;
\r
155 int *** AllocateIntPtrMatrix(int rows, int columns){
\r
156 int *** result = new int ** [rows];
\r
158 for (int i = 0; i < rows; i++)
\r
159 result[i] = new int * [columns];
\r
164 void FreeIntPtrMatrix(int *** & matrix, int rows){
\r
166 for (int i = 0; i < rows; i++)
\r
167 delete [] matrix[i];
\r
174 void NewGeneration(){
\r
176 int *** ptrmatrix, * vector;
\r
179 // for(int i=0;i<maxN;i++){
\r
180 // if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]);
\r
183 SWAP(ptrmatrix, children, parents);
\r
184 SWAP(vector, child_First, parent_First);
\r
185 SWAP(vector, child_Last, parent_Last);
\r
186 SWAP(boolvector, child_flags, parent_flags);
\r
187 SWAP(vector, cblocks, pblocks);
\r
191 nPOP = (int)(popProfile[currentProfile].popsize.size());
\r
192 N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();
\r
196 for (int i = 0; i < maxN; i++)
\r
197 parent_flags[i] = 0;
\r
199 // for (int i = 0; i < maxN; i++)
\r
200 // for (int j = 0; j < numBLOCKs; j++)
\r
201 // parents[i][j] = NULL;
\r
208 void AllocateMemory(vector< vector<bool> > &return_chromosome){
\r
212 // cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;
\r
215 // cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;
\r
217 // cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;
\r
220 return_chromosome.clear();
\r
221 return_chromosome.resize(n);
\r
222 // for(int i=0;i<n;i++){
\r
224 // //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));
\r
227 // //return_chromosome[i].reserve(SNP*numChr);
\r
231 chromosome.resize(n);
\r
232 for(int i=0;i<n;i++){
\r
233 chromosome[i].resize(L);
\r
234 // for(int j=0;j<L;j++){
\r
236 // //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));
\r
239 // //chromosome[i][j].reserve(SNP/L+1);
\r
244 genepool = new int [poolsize];
\r
246 numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);
\r
253 void AllocateMutationMemory(){
\r
255 fragmentLength = new int [L];
\r
257 branchFragment = new int [poolsize];
\r
259 geneIndex = new int [poolsize];
\r
267 delete [] genepool;
\r
269 if(recombVector!=NULL)delete [] recombVector;
\r
274 void FreeMutationMemory(){
\r
276 delete [] fragmentLength;
\r
277 delete [] geneIndex;
\r
278 delete [] branchMutation;
\r
283 void FreeCoalescentMemory(){
\r
286 delete [] MRCAtimes;
\r
288 delete [] fragments;
\r
290 FreeIntPtrMatrix(children,maxN);
\r
292 FreeIntPtrMatrix(parents,maxN);
\r
294 delete [] child_flags;
\r
295 delete [] parent_flags;
\r
300 delete [] parent_First;
\r
301 delete [] parent_Last;
\r
303 delete [] child_First;
\r
304 delete [] child_Last;
\r
308 void reallocBlock(){
\r
310 pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );
\r
311 cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );
\r
312 BUFFER += INCREMENT;
\r
316 void setnull(int parent, int position){
\r
318 if (parent_flags[parent] == 0){
\r
319 parent_First[parent]=position / BLOCKSIZE;
\r
320 parent_Last[parent]=position / BLOCKSIZE;
\r
321 parents[parent][position / BLOCKSIZE] = NULL;
\r
323 else if(parent_First[parent]>position / BLOCKSIZE){
\r
324 for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){
\r
325 parents[parent][i] = NULL;
\r
327 parent_First[parent]=position / BLOCKSIZE;
\r
329 else if(parent_Last[parent]<position / BLOCKSIZE){
\r
330 for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){
\r
331 parents[parent][i] = NULL;
\r
333 parent_Last[parent]=position / BLOCKSIZE;
\r
340 void TouchParent(int parent, int position){
\r
343 setnull(parent,position);
\r
345 if (parents[parent][position / BLOCKSIZE] != NULL)return;
\r
348 if (pblockcounter >= n*numBLOCKs+BUFFER){
\r
350 int * pblocks_old=pblocks;
\r
351 int * cblocks_old=cblocks;
\r
355 for(int i=0;i<N;i++)
\r
356 if(parent_flags[i])
\r
357 for(int j=parent_First[i];j<=parent_Last[i];j++)
\r
358 if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;
\r
360 for(int i=0;i<N;i++)
\r
362 for(int j=child_First[i];j<=child_Last[i];j++)
\r
363 if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;
\r
365 printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);
\r
367 if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");
\r
370 parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;
\r
372 for (int i = 0; i < BLOCKSIZE; i++)
\r
373 parents[parent] [position / BLOCKSIZE] [i] = -1;
\r
375 parent_flags[parent] = 1;
\r
381 genetimes = new int [poolsize];
\r
383 MRCA = new int [L];
\r
384 MRCAtimes = new int [L];
\r
386 fragments = new int [L];
\r
388 children = AllocateIntPtrMatrix(maxN, numBLOCKs);
\r
389 parents = AllocateIntPtrMatrix(maxN, numBLOCKs);
\r
391 parent_First = new int [maxN];
\r
392 parent_Last = new int [maxN];
\r
394 child_First = new int [maxN];
\r
395 child_Last = new int [maxN];
\r
397 cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));
\r
398 pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));
\r
400 child_flags = new bool [maxN];
\r
401 parent_flags = new bool [maxN];
\r
406 nPOP = (int)(popProfile[0].popsize.size());
\r
408 N = popProfile[0].popStart.back()+popProfile[0].popsize.back();
\r
410 currentProfile = 0;
\r
414 for (int i = 0; i < N; i++)
\r
415 child_flags[i] = 0;
\r
418 for(int k=0;k<nPOP;k++){
\r
419 for (int i = 0; i < nSample[k]; i++)
\r
421 child_First[popProfile[0].popStart[k]+i]=0;
\r
422 child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;
\r
424 child_flags[popProfile[0].popStart[k]+i] = 1;
\r
426 for (int j = 0; j < numBLOCKs; j++)
\r
427 children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);
\r
429 for (int j = 0; j < L; j++)
\r
431 genepool[nextFree] = 1;
\r
432 genetimes[nextFree] = 0;
\r
433 children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;
\r
440 for (int i = 0; i < L; i++)
\r
443 for (int i = 0; i < N; i++)
\r
444 parent_flags[i] = 0;
\r
446 activeSegments = L;
\r
448 //for (int i = 0; i < N; i++)
\r
449 // for (int j = 0; j < numBLOCKs; j++)
\r
450 // parents[i][j] = NULL;
\r
456 void InitializeMutation(vector< vector<bool> > &return_chromosome){
\r
458 // return_chromosome.clear();
\r
459 // return_chromosome.resize(n);
\r
460 // for(int i=0;i<n;i++){
\r
461 // return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));
\r
464 // chromosome.resize(n);
\r
465 // for(int i=0;i<n;i++){
\r
466 // chromosome[i].resize(L);
\r
467 // for(int j=0;j<L;j++){
\r
468 // chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));
\r
472 // cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;
\r
474 for(int i=0;i<n;i++){
\r
475 for(int j=0;j<L;j++){
\r
476 chromosome[i][j].clear();
\r
480 // cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;
\r
482 branchMutation = new int [nextFree];
\r
483 for(int i=0;i<nextFree;i++)branchMutation[i]=0;
\r
487 void PrepareMutation(){
\r
489 for(int i=0;i<L;i++)fragmentLength[i]=0;
\r
491 for(int i=0;i<nextFree;i++){
\r
492 geneIndex[i] = -1;
\r
493 branchFragment[i] = -1;
\r
498 int SampleParent(int individual, int previous){
\r
501 int population = individual/popProfile[0].popsize[0];
\r
503 if (previous == -1 && nPOP>1){
\r
504 if (globalRandom.Next() < migration){
\r
505 int random = globalRandom.NextInt()%(nPOP-1);
\r
506 if(population<=random)population=random+1;
\r
507 else population=random;
\r
511 return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];
\r
518 for(int i=0;i<nPOP;i++){
\r
519 if(individual>=popProfile[currentProfile].popStart[i])population=i;
\r
523 // cout<<"No change: ind="<<individual<<" pop="<<population<<endl;
\r
525 if (previous == -1 && nPOP>1){
\r
526 if (globalRandom.Next() < migration){
\r
527 int random = globalRandom.NextInt()%(nPOP-1);
\r
528 if(population<=random)population=random+1;
\r
529 else population=random;
\r
533 // cout<<"new pop="<<population<<endl;
\r
535 return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];
\r
540 for(int i=0;i<nPOP;i++){
\r
541 if(individual>=popProfile[currentProfile].popStart[i])population=i;
\r
544 // cout<<"Change: ind="<<individual<<" pop="<<population<<endl;
\r
546 if (previous == -1 && nPOP>1){
\r
547 if (globalRandom.Next() < migration){
\r
548 int random = globalRandom.NextInt()%(nPOP-1);
\r
549 if(population<=random)population=random+1;
\r
550 else population=random;
\r
554 // cout<<"new pop="<<population<<endl;
\r
556 if(popProfile[currentProfile].popChange[population].size()==1){
\r
557 // cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;
\r
559 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]])
\r
560 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]];
\r
563 double selectRandom = globalRandom.Next();
\r
564 for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){
\r
565 if(selectRandom < popProfile[currentProfile].selectProb[population][i]){
\r
567 // cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;
\r
569 return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]])
\r
570 + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];
\r
573 cout<<"Random number generator error!"<<endl;
\r
583 void readPopProfile(string filename, unsigned int numPopulation){
\r
585 int popProfile_index=0;
\r
587 popProfile.clear();
\r
588 struct popProfilesStruct temp_popProfile;
\r
589 popProfile.push_back(temp_popProfile);
\r
596 popFile.open(filename.c_str());
\r
597 if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());
\r
599 getline(popFile,fileline); // the first line is the population sizes at generation 0
\r
601 term = strtok ((char*)fileline.c_str()," \t"); // the first term is the generation
\r
602 popProfile[popProfile_index].generation=atoi(term);
\r
604 if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");
\r
606 term = strtok (NULL, " \t"); // the second term and after are the sizes of that population
\r
608 popProfile[popProfile_index].popsize.push_back(atoi(term));
\r
609 term = strtok (NULL, " \t");
\r
612 popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0
\r
613 for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){
\r
614 popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());
\r
617 maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();
\r
619 if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());
\r
621 // read in the population correspondence to the next population profile
\r
624 while(popFile.peek()!=EOF){
\r
625 getline(popFile,fileline); // the even number lines are the correspondence of populations of different generation
\r
626 popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());
\r
628 term = strtok ((char*)fileline.c_str(),"- \t"); // the FROM population
\r
631 term1 = strtok (NULL, "- \t"); // the TO population
\r
633 popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);
\r
635 term = strtok (NULL, "- \t");
\r
638 popProfile_index++;
\r
639 popProfile.push_back(temp_popProfile);
\r
641 if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");
\r
643 // read the next population profile
\r
645 getline(popFile,fileline); // the population sizes
\r
647 term = strtok ((char*)fileline.c_str()," \t"); // the first term is the generation
\r
648 popProfile[popProfile_index].generation=atoi(term);
\r
650 term = strtok (NULL, " \t"); // the second term and after are the sizes of that population
\r
652 popProfile[popProfile_index].popsize.push_back(atoi(term));
\r
653 term = strtok (NULL, " \t");
\r
656 popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0
\r
657 for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){
\r
658 popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());
\r
661 if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())
\r
662 maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();
\r
665 for(unsigned int i=0;i<popProfile.size();i++){
\r
666 popProfile[i].selectProb.resize(popProfile[i].popChange.size());
\r
667 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){
\r
669 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];
\r
671 popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);
\r
672 for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)
\r
673 popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);
\r
676 // check for consistence
\r
680 for(unsigned int i=0;i<popProfile.size()-1;i++){
\r
682 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){
\r
683 if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1);
\r
684 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){
\r
685 if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];
\r
686 if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];
\r
689 if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))
\r
690 error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());
\r
696 void readRecombination(string filename, int pieces){
\r
698 ifstream recombFile;
\r
702 vector<float> recDistribution;
\r
703 vector<float> recRate;
\r
708 recombFile.open(filename.c_str());
\r
709 if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());
\r
711 getline(recombFile,fileline); // the first line is head of distribution table
\r
712 term = strtok ((char*)fileline.c_str()," \t");
\r
713 term = strtok (NULL, " \t");
\r
715 if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){
\r
716 while(recombFile.peek()!=EOF){
\r
717 getline(recombFile,fileline);
\r
719 term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate
\r
720 recRate.push_back((float)(atof(term)));
\r
722 if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));
\r
724 term = strtok (NULL, " \t"); // the second number is the frequency of recombination rate
\r
725 recDistribution.push_back((float)(atof(term)));
\r
727 if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));
\r
729 sum += (float)(atof(term));
\r
732 if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());
\r
734 // normalize the frequency
\r
735 for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;
\r
737 // compute the cumulative distribution
\r
738 for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];
\r
740 recombVector = new float [pieces-1];
\r
742 for(int i=0;i<pieces-1;i++){
\r
743 random = (float)(globalRandom.Next());
\r
744 for(unsigned int j=0;j<recDistribution.size();j++){
\r
745 if(random < recDistribution[j]){
\r
746 recombVector[i]=recRate[j];
\r
753 cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;
\r
754 cout<<"Recombination_Rate\tCumulative_frequency"<<endl;
\r
755 for(unsigned int i=0;i<recRate.size();i++){
\r
756 cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl;
\r
760 else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){
\r
761 float currentRec=0.0,nextRec=0.0;
\r
764 if(recombFile.peek()!=EOF){
\r
765 getline(recombFile,fileline);
\r
766 term = strtok ((char*)fileline.c_str()," \t");
\r
767 currentRec=(float)(atof(term));
\r
769 if(recombFile.peek()!=EOF){
\r
770 getline(recombFile,fileline);
\r
771 term = strtok ((char*)fileline.c_str()," \t");
\r
772 nextRec=(float)(atof(term));
\r
773 term = strtok (NULL, " \t");
\r
774 nextPos=atoi(term);
\r
782 error("Recombination file does not contain recombination rate information.\n");
\r
785 recombVector = new float [pieces-1];
\r
787 for(int i=0;i<pieces-1;i++){
\r
789 currentRec=nextRec;
\r
790 if(recombFile.peek()!=EOF){
\r
791 getline(recombFile,fileline);
\r
792 term = strtok ((char*)fileline.c_str()," \t");
\r
793 nextRec=(float)(atof(term));
\r
794 term = strtok (NULL, " \t");
\r
795 nextPos=atoi(term);
\r
802 recombVector[i]=currentRec;
\r
809 error("The second column is not \"freq\" or \"pos\".\n");
\r
815 cout<<"The recombination rates between fragments are:"<<endl<<"\t";
\r
816 for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";
\r
817 cout<<"("<<pieces<<")"<<endl;
\r
818 cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;
\r
824 map<int,string > tree;
\r
825 ostringstream temp;
\r
828 for(int i=0;i<L;i++){
\r
831 for(int j=0;j<n;j++){
\r
832 if(tree[genepool[j*L+i]]!=""){
\r
834 temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];
\r
835 tree[genepool[j*L+i]]= temp.str();
\r
839 temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];
\r
840 tree[genepool[j*L+i]]= temp.str();
\r
845 for(int j=n*L;j<nextFree;j++){
\r
847 if(genepool[j]==0){
\r
848 cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;
\r
851 if(tree[genepool[j]]!=""){
\r
853 temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];
\r
854 tree[genepool[j]]= temp.str();
\r
858 temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];
\r
859 tree[genepool[j]]=temp.str();
\r
872 void genome(string popSize, // effective population size of each population
\r
873 int nSubPOP, // number of populations
\r
874 vector<int> & nSubSample, // number of samples (haplotypes) draw from each populations
\r
875 int numPieces, // number of fragments for each sample (chromosome)
\r
876 int pieceLen, // length in base pair of each fragment
\r
877 int numIndepRegion, // number of independent regions (independent chromosome)
\r
878 int s, // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution
\r
879 string rec, // recombination rate between consecutive fragments per generation
\r
880 double mut, // mutation rate per generation per base pair
\r
881 double mig, // migration rate per generation
\r
882 vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome
\r
883 long t, // random seed
\r
884 int drawtree, // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees
\r
885 bool printparameters // =1 print out all input parameters, =0 do not print input parameters
\r
888 if(atoi(popSize.c_str())>0){ // if the population size is fixed during evoluation
\r
889 N = atoi(popSize.c_str())*nSubPOP;
\r
894 popProfile.clear();
\r
895 struct popProfilesStruct temp_popProfile;
\r
896 popProfile.push_back(temp_popProfile);
\r
898 popProfile[0].generation = 0;
\r
900 for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));
\r
901 for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));
\r
903 cout<<"*** POPULATION PROFILE ***"<<endl;
\r
904 cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;
\r
905 cout<<"Sizes of populations = ";
\r
906 for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";
\r
908 // cout<<"POP Start index = ";
\r
909 // for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";
\r
911 cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;
\r
914 else{ // popSize stores the filename of the population profile during evoluation
\r
917 currentProfile = 0;
\r
919 readPopProfile(popSize,nSubPOP);
\r
921 N = popProfile[0].popStart.back()+popProfile[0].popsize.back();
\r
923 // print out the population profile information.
\r
925 cout<<"*** POPULATION PROFILE ***"<<endl;
\r
926 cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;
\r
927 cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;
\r
929 for(unsigned int i=0;i<popProfile.size();i++){
\r
930 cout<<"Generation = "<<popProfile[i].generation<<endl;
\r
931 cout<<"Sizes of populations= ";
\r
932 for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";
\r
934 // cout<<"POP Start index = ";
\r
935 // for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";
\r
937 if(popProfile[i].popChange.size()>0){
\r
938 cout<<"Populations change backward in time:"<<endl;
\r
939 for(unsigned int j=0;j<popProfile[i].popChange.size();j++){
\r
940 cout<<" From "<<j+1<<" to ";
\r
941 for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";
\r
945 // cout<<"Selection cumulative probability:"<<endl;
\r
946 // for(int j=0;j<popProfile[i].selectProb.size();j++){
\r
947 // cout<<" From "<<j+1<<" with prob = ";
\r
948 // for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";
\r
952 if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;
\r
959 nSample.resize(nPOP);
\r
960 for(int i=0;i<nPOP;i++){
\r
961 if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");
\r
962 n += nSubSample[i];
\r
963 nSample[i]=nSubSample[i];
\r
968 numChr = numIndepRegion;
\r
971 if(atof(rec.c_str())>0){ // if the recombination rate is fixed
\r
972 recombination = atof(rec.c_str());
\r
974 else{ // the distribution of recombination rate is described in the file (filename stored in rec)
\r
975 recombination = -1;
\r
976 readRecombination(rec,L);
\r
984 if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t
\r
985 cout<<endl<<"random seed="<<t<<endl;
\r
986 globalRandom.Reset(t);
\r
988 if(printparameters){
\r
989 printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);
\r
990 cout<<"subSample = ";
\r
991 for(int i=0;i<nPOP;i++){
\r
992 cout<<nSample[i]<<" ";
\r
997 if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){
\r
998 cout<<"Input parameters have to be positive."<<endl;
\r
999 printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);
\r
1003 poolsize = 2 * n * L - L;
\r
1005 AllocateMemory(return_chromosome);
\r
1007 // cout<<"memory OK!"<<endl<<endl;
\r
1009 for(int chr=0;chr<numChr;chr++){
\r
1013 // cout<<"Initialization OK!"<<endl;
\r
1015 bool done = false;
\r
1016 int generation = 0, units = n * L;
\r
1020 // printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%] %c",
\r
1021 // generation++, units, nextFree * 100. / poolsize,
\r
1022 // !debug ? '\r' : '\n');
\r
1026 if(!fixedPOP && currentProfile<popProfile.size()-1)
\r
1027 if(generation == popProfile[currentProfile+1].generation)noChange=false;
\r
1031 // printf("F : ");
\r
1032 // for (int i = 0; i < L; i++)
\r
1033 // printf("%3d ", fragments[i]);
\r
1037 // Position of the first segment allocated to the current generation
\r
1038 int generationStart = nextFree;
\r
1040 // For each individual in the population
\r
1041 for (int i = 0; i < N && !done; i++)
\r
1042 if (child_flags[i])
\r
1045 /*if (debug) printf("%2d : ", i);*/
\r
1047 // The position we are currently tracking and its distance
\r
1048 // from the previous position
\r
1049 int distance = 0, parent = -1;
\r
1050 int startPiece = 0;
\r
1053 for (int block = child_First[i]; block <= child_Last[i]; block++)
\r
1054 if (children[i][block] == NULL)
\r
1055 distance += BLOCKSIZE;
\r
1058 int pos = BLOCKSIZE * block;
\r
1060 int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );
\r
1062 while (pos < pos_bound )
\r
1065 pos_in_block = pos % BLOCKSIZE;
\r
1066 // Skip through uninformative positions along each chromosome
\r
1067 if (children[i][block][pos_in_block] == -1)
\r
1069 //if (debug) printf(" . \n");
\r
1075 // Pick an ancestor at random for the current piece of chromosome
\r
1077 if(recombination>0){ // if the recombination rate is fixed over the genome
\r
1078 recombProb = (float)(pow(1.0 - recombination, distance));
\r
1080 else{ // if the recombination rate is specified by the profile
\r
1082 for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);
\r
1086 if (parent == -1 ||
\r
1087 distance > 0 && globalRandom.Next() > recombProb){
\r
1088 parent = SampleParent(i, parent);
\r
1092 TouchParent(parent, pos);
\r
1095 // Reset distance between segments
\r
1099 //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);
\r
1101 // Check if we sampled a new ancestral segment ...
\r
1102 if (parents[parent][block][pos_in_block] == -1)
\r
1104 // We delay sampling a new gene from the pool until we know
\r
1105 // a coalescent event occured ...
\r
1106 parents[parent][block][pos_in_block] = children[i][block][pos_in_block];
\r
1109 // Or if a coalescent event occured
\r
1112 // If we previously delayed sampling this parent
\r
1113 if (parents[parent][block][pos_in_block] < generationStart)
\r
1115 genetimes[nextFree] = generation;
\r
1116 genepool[parents[parent][block][pos_in_block]] = nextFree;
\r
1117 parents[parent][block][pos_in_block] = nextFree++;
\r
1119 if (nextFree == poolsize && units != 2)
\r
1120 error("Genepool exhausted\n");
\r
1123 genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];
\r
1128 if (fragments[pos] == 1)
\r
1130 // Reached the MRCA for this fragment
\r
1131 MRCAtimes[pos] = generation;
\r
1132 MRCA[pos] = parents[parent][block][pos_in_block];
\r
1134 genepool[parents[parent][block][pos_in_block]]=0;
\r
1136 parents[parent][block][pos_in_block] = -1;
\r
1139 // One less segment to track
\r
1140 if (--activeSegments == 0)
\r
1157 cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;
\r
1161 // assign mutations
\r
1163 cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;
\r
1165 FreeCoalescentMemory();
\r
1167 cout<<"Coalescent Memory free"<<endl;
\r
1169 InitializeMutation(return_chromosome);
\r
1171 cout<<"Mutation process initialized"<<endl;
\r
1173 // cout<<"genepool:\t";
\r
1174 // for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";
\r
1177 // cout<<"genetimes:\t";
\r
1178 // for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";
\r
1181 // cout<<"nextFree="<<nextFree<<endl;
\r
1183 // cout<<"mutation:\t";
\r
1184 // for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";
\r
1188 for(int i=0;i<nextFree;i++){
\r
1189 if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation);
\r
1193 unsigned long * cumulativeCount = new unsigned long [nextFree];
\r
1195 if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];
\r
1196 else cumulativeCount[0] = 0;
\r
1198 for(int i=1;i<nextFree;i++){
\r
1199 if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i];
\r
1200 else cumulativeCount[i] = cumulativeCount[i-1];
\r
1203 unsigned long totalGeneration = cumulativeCount[nextFree - 1];
\r
1205 // cout<<"Total Generation = "<<totalGeneration<<endl;
\r
1207 // if((unsigned long)SNP>totalGeneration){
\r
1208 // cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl;
\r
1211 for(int i=0;i<SNP;i++){
\r
1213 unsigned long randomLong = globalRandom.NextInt()%totalGeneration;
\r
1216 int end = nextFree - 1;
\r
1217 int middle = (begin+end)/2;
\r
1220 if(cumulativeCount[middle]<randomLong){
\r
1227 if(end == begin+1)break;
\r
1229 middle = (begin+end)/2;
\r
1232 if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){
\r
1233 cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;
\r
1234 error("see above message!\n");
\r
1239 if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;
\r
1240 else sampleIndex = end;
\r
1242 if(genepool[sampleIndex]==0){
\r
1243 cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;
\r
1244 error("see above message!\n");
\r
1247 branchMutation[sampleIndex]++;
\r
1250 delete [] cumulativeCount;
\r
1254 delete [] genetimes;
\r
1256 cout<<"Mutation assigned"<<endl;
\r
1258 // cout<<"mutation:\t";
\r
1259 // for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";
\r
1262 AllocateMutationMemory();
\r
1264 cout<<"Mutation Memory allocated"<<endl;
\r
1266 PrepareMutation();
\r
1268 for(int i=0;i<n;i++){
\r
1269 for(int j=0;j<L;j++){
\r
1270 if(branchMutation[i*L+j]>0){
\r
1271 geneIndex[i*L+j] = fragmentLength[j];
\r
1272 fragmentLength[j] += branchMutation[i*L+j];
\r
1274 branchFragment[i*L+j] = j;
\r
1279 for(int i=0;i<nextFree;i++){
\r
1280 if(genepool[genepool[i]]!=0 && genepool[i]!=0){
\r
1281 if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){
\r
1282 geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];
\r
1283 fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];
\r
1285 branchFragment[genepool[i]]=branchFragment[i];
\r
1289 //cout<<"branchfragment done"<<endl;
\r
1291 // cout<<"branchFragment:\t";
\r
1292 // for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";
\r
1295 // cout<<"geneIndex:\t";
\r
1296 // for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";
\r
1299 // cout<<"fragmentLength:\t";
\r
1300 // for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";
\r
1304 // cout<<"genepool genetime mutation frag index\n";
\r
1305 // for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;
\r
1310 delete [] branchFragment;
\r
1312 cout<<"Internal memory free"<<endl;
\r
1314 // print the fragment index for each SNP
\r
1315 cout<<"SNP genetic position scaled to [0,1]:"<<endl;
\r
1317 for(int i=0;i<L;i++){
\r
1318 for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" ";
\r
1322 cout<<"Only one fragment to be simulated. No recombination between SNPs.";
\r
1326 int totalLength=0;
\r
1328 for(int i=0;i<L;i++)totalLength += fragmentLength[i];
\r
1332 for(int i=0;i<n;i++){
\r
1333 for(int j=0;j<L;j++){
\r
1334 chromosome[i][j].resize(fragmentLength[j],0);
\r
1336 for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;
\r
1338 int current=i*L+j;
\r
1340 while(genepool[genepool[current]]!=0){
\r
1341 for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;
\r
1342 current=genepool[current];
\r
1347 cout<<"Chromosome done"<<endl;
\r
1349 FreeMutationMemory();
\r
1352 for(int i=0;i<n;i++){
\r
1353 return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);
\r
1357 for(int i=0;i<n;i++){
\r
1358 for(int j=0;j<L;j++){
\r
1359 return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());
\r
1363 cout<<"Return chromosome done"<<endl;
\r
1365 // print the chromosomes
\r
1366 // for(int i=0;i<n;i++){
\r
1367 // printf("Chr %d: ",i);
\r
1368 // for(int j=0;j<L;j++){
\r
1369 // for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];
\r
1376 // for(int i=0;i<n;i++){
\r
1377 // printf("return Chr %d: ",i);
\r
1378 // for(int j=0;j<return_chromosome[i].size();j++){
\r
1379 // cout<<return_chromosome[i][j];
\r
1390 printf("\nDone simulating ARG ...\n");
\r