--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// Error.cpp \r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+#include "Error.h"\r
+\r
+#include "stdlib.h"\r
+#include "stdarg.h"\r
+#include "stdio.h"\r
+\r
+// Declare a dummy class to ensure that compilers recognize this as C++ code\r
+class String;\r
+\r
+void error ( const char * msg, ... )\r
+ {\r
+ va_list ap;\r
+\r
+ va_start(ap, msg);\r
+\r
+ printf("\nFATAL ERROR - \n");\r
+ vprintf(msg, ap);\r
+ printf("\n\n");\r
+\r
+ va_end(ap);\r
+\r
+ exit(EXIT_FAILURE);\r
+ }\r
+\r
+void warning ( const char * msg, ... )\r
+ {\r
+ va_list ap;\r
+\r
+ va_start(ap, msg);\r
+\r
+ printf("\n\aWARNING - \n");\r
+ vprintf(msg, ap);\r
+ printf("\n");\r
+\r
+ va_end(ap);\r
+ }\r
+\r
+void numerror ( const char * msg , ... )\r
+ {\r
+ va_list ap;\r
+\r
+ va_start(ap, msg);\r
+\r
+ printf("\nFATAL NUMERIC ERROR - ");\r
+ vprintf(msg, ap);\r
+ printf("\n\n");\r
+\r
+ va_end(ap);\r
+\r
+ exit(EXIT_FAILURE);\r
+ }\r
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// Error.h\r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#ifndef _ERROR_H_\r
+#define _ERROR_H_\r
+\r
+// #ifdef __cplusplus\r
+// extern "C" {\r
+// #endif\r
+\r
+void error(const char * msg, ...);\r
+void warning(const char * msg, ...);\r
+void numerror(const char * msg, ...);\r
+\r
+// #ifdef __cplusplus\r
+// };\r
+// #endif\r
+\r
+\r
+#endif\r
--- /dev/null
+\r
+COPYRIGHT NOTICE FOR GENOME\r
+=====================================\r
+\r
+ GENOME coded by Liming Liang and Goncalo Abecasis. \r
+\r
+ Copyright (C) 2006 - 2007, Liming Liang and Goncalo Abecasis,\r
+ All rights reserved. \r
+\r
+ Redistribution and use in source and binary forms, with or without\r
+ modification, are permitted provided that the following conditions\r
+ are met:\r
+\r
+ 1. Redistributions of source code must retain the above copyright\r
+ notice, this list of conditions and the following disclaimer.\r
+\r
+ 2. Redistributions in binary form must reproduce the above copyright\r
+ notice, this list of conditions and the following disclaimer in the\r
+ documentation and/or other materials provided with the distribution.\r
+\r
+ 3. The names of its contributors may not be used to endorse or promote \r
+ products derived from this software without specific prior written \r
+ permission.\r
+\r
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+\r
--- /dev/null
+Mersenne twister code is included in the file Random.cpp
+
+COPYRIGHT NOTICE FOR MERSENNE TWISTER
+=====================================
+
+ Mersenne twister coded by Takuji Nishimura and Makoto Matsumoto.
+
+ Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// Main.cpp \r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#include <iostream>\r
+#include <math.h>\r
+#include <vector> \r
+#include <time.h>\r
+#include "genome.h"\r
+#include "Random.h"\r
+#include <stdio.h> \r
+#include <iomanip>\r
+ \r
+using namespace std;\r
+\r
+\r
+int main(int argc, char ** argv)\r
+ {\r
+ string popSize("10000"), rec("0.0001");\r
+ \r
+ int numPieces=100, pieceLen=10000, numIndepRegion=1, SNP=-1;\r
+ int nSubPOP=2; \r
+ vector<int> nSubSample(nSubPOP);\r
+ for(int i=0;i<nSubPOP;i++)nSubSample[i]=10;\r
+ \r
+ double mut=1e-8, mig=2.5e-4;\r
+ double maf=0.0,prop=1.0;\r
+ \r
+ vector< vector<bool> > data;\r
+\r
+ long seed=-1;\r
+ \r
+ int drawtree=0;\r
+\r
+ if(argc==1){\r
+ cout << endl << "GENOME-0.2: Whole Genome Coalescent Simulator. (2009.2) Liming Liang, Goncalo Abecasis" << endl << endl;\r
+ cout << " Parameters and default values:" << endl;\r
+ cout << " -pop " << "number of subpopulations and size of subsamples [ " <<nSubPOP<<" "; \r
+ for(int i=0;i<nSubPOP;i++)cout<<nSubSample[i]<<" ";cout<<"]"<< endl;\r
+ cout << " -N " << "effective size of each subpopulation or the filename for population profile [" <<popSize.c_str()<<"]"<< endl;\r
+ cout << " -c " << "number of independent regions (chromosomes) to simulate [" <<numIndepRegion<<"]"<< endl;\r
+ cout << " -pieces " << "number of fragments per independent region [" <<numPieces<<"]"<< endl;\r
+ cout << " -len " << "length in base of each fragment [" <<pieceLen<<"]"<< endl;\r
+ cout << " -s " << "fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [" <<SNP<<"]"<< endl;\r
+ cout << " -rec " << "recombination rate bewteen consecutive fragments"<<" or the filename for recombination rate distribution ["<<rec.c_str()<<"]"<< endl;\r
+ cout << " -mut " << "mutation rate per generation per base pair [" <<mut<<"]"<< endl;\r
+ cout << " -mig " << "migration rate per generation per individual [" <<mig<<"]"<< endl;\r
+ cout << " -seed " << "random seed, -1 = use time as the seed [" <<seed<<"]"<< endl;\r
+ cout << " -tree " << "1=draw the genealogy trees, 0=do not output [" <<drawtree<<"]"<< endl;\r
+ cout << " -maf " << "Output SNPs with minor allele frequency greater than [" <<maf<<"]"<< endl;\r
+ cout << " -prop " << "To keep this proportion of SNPs with MAF < the value of -maf parameter [" <<prop<<"]"<< endl;\r
+ exit(0);\r
+ }\r
+ \r
+\r
+ int k=0;\r
+ for(int i=1;i<argc;i++){\r
+ if(strcmp(argv[i],"-pop")==0){ \r
+ nSubPOP=atoi(argv[++i]); k++;\r
+ nSubSample.clear();\r
+ nSubSample.resize(nSubPOP);\r
+ for(int iter=0;iter<nSubPOP;iter++)nSubSample[iter]=atoi(argv[++i]);\r
+ }\r
+ else if(strcmp(argv[i],"-N")==0){ popSize.assign(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-c")==0){ numIndepRegion=atoi(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-pieces")==0){ numPieces=atoi(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-len")==0){ pieceLen=atoi(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-s")==0){ SNP=atoi(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-rec")==0){ rec.assign(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-mut")==0){ mut=atof(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-mig")==0){ mig=atof(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-seed")==0){ seed=atol(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-tree")==0){ drawtree=atoi(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-maf")==0){ maf=atof(argv[++i]); k++;}\r
+ else if(strcmp(argv[i],"-prop")==0){ prop=atof(argv[++i]); k++;}\r
+ }\r
+ \r
+ if(k!=(argc-1)/2 && k!=(argc-1-nSubPOP)/2){\r
+ cout << "Parameters do not match! " << argc<< " " << k << endl; \r
+ exit(1);\r
+ } \r
+\r
+ cout << endl \r
+ << "GENOME-0.2: Whole Genome Coalescent Simulator. (2009.2) Liming Liang, Goncalo Abecasis" << endl << endl;\r
+ cout << " Parameters and effective values:" << endl;\r
+ cout << " -pop " << "number of subpopulations and size of subsamples [ " <<nSubPOP<<" "; \r
+ for(int i=0;i<nSubPOP;i++)cout<<nSubSample[i]<<" ";cout<<"]"<< endl;\r
+ cout << " -N " << "effective size of each subpopulation or the filename for population profile [" <<popSize.c_str()<<"]"<< endl;\r
+ cout << " -c " << "number of independent regions (chromosomes) to simulate [" <<numIndepRegion<<"]"<< endl;\r
+ cout << " -pieces " << "number of fragments per independent region [" <<numPieces<<"]"<< endl;\r
+ cout << " -len " << "length in base of each fragment [" <<pieceLen<<"]"<< endl;\r
+ cout << " -s " << "fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [" <<SNP<<"]"<< endl;\r
+ cout << " -rec " << "recombination rate bewteen consecutive fragments or the filename for recombination rate distribution ["<<rec.c_str()<<"]"<< endl;\r
+ cout << " -mut " << "mutation rate per generation per base pair [" <<mut<<"]"<< endl;\r
+ cout << " -mig " << "migration rate per generation per individual [" <<mig<<"]"<< endl;\r
+ cout << " -seed " << "random seed, -1 = use time as the seed [" <<seed<<"]"<< endl;\r
+ cout << " -tree " << "1=draw the genealogy trees, 0=do not output [" <<drawtree<<"]"<< endl;\r
+ cout << " -maf " << "Output SNPs with minor allele frequency greater than [" <<maf<<"]"<< endl;\r
+ cout << " -prop " << "To keep this proportion of SNPs with MAF < the value of -maf parameter [" <<prop<<"]"<< endl<<endl;\r
+\r
+ if(atoi(popSize.c_str())>0 && atof(rec.c_str())>0){ \r
+ printf(" Scaled recombination rate = %.4e\n", 2 * atoi(popSize.c_str()) * (numPieces - 1) * atof(rec.c_str()));\r
+ printf(" Scaled migration rate = %.4e\n", 2 * atoi(popSize.c_str()) * mig);\r
+ printf(" Scaled mutation rate = %.4e\n\n", 2 * atoi(popSize.c_str()) * mut * numPieces * pieceLen);\r
+ }\r
+ \r
+ genome(popSize, nSubPOP, nSubSample, numPieces, pieceLen, numIndepRegion, SNP, rec, mut, mig, data, seed,drawtree);\r
+ \r
+// output format for haploview\r
+\r
+// for(int i=0;i<data.size();i++){\r
+// printf("FAMILY1 MEMBER%d ",i/2+1);\r
+// for(int j=0;j<data[i].size();j++){\r
+// cout<<data[i][j]+1<<" ";\r
+// }\r
+// cout<<endl;\r
+// }\r
+\r
+\r
+// 0 = ancestral state, 1 = mutated state\r
+\r
+ cout<<"Total number of SNPs = "<<data[0].size()<<endl;\r
+ cout<<"Samples:"<<endl;\r
+ \r
+ if(maf>0.0 && prop < 1.0){ // filter out the SNPs with MAF < maf \r
+ \r
+ cout<<"Rare SNPs filtered according to parameters."<<endl;\r
+ \r
+ int n=0;\r
+ for(int k=0;k<nSubPOP;k++)n+=nSubSample[k];\r
+ \r
+ int m=(int)(data[0].size());\r
+ vector<bool> keep(m,true);\r
+ \r
+ float p;\r
+ for(int i=0;i<m;i++){\r
+ p=0.0;\r
+ for(int j=0;j<n;j++){\r
+ p += data[j][i]; \r
+ }\r
+ p /= n;\r
+ \r
+ if( (p<maf || (1-p)<maf) && globalRandom.Next()>prop)keep[i]=0;\r
+ }\r
+ \r
+\r
+ int index=0;\r
+ for(int k=0;k<nSubPOP;k++){\r
+ for(int i=0;i<nSubSample[k];i++){\r
+ cout<<"POP"<<k+1<<": ";\r
+ for(unsigned int j=0;j<data[index].size();j++){\r
+ if(keep[j])cout<<data[index][j];\r
+ }\r
+ cout<<endl;\r
+ index++;\r
+ } \r
+ }\r
+ \r
+ }\r
+ else{\r
+ int index=0;\r
+ for(int k=0;k<nSubPOP;k++){\r
+ for(int i=0;i<nSubSample[k];i++){\r
+ cout<<"POP"<<k+1<<": ";\r
+ for(unsigned int j=0;j<data[index].size();j++){\r
+ cout<<data[index][j];\r
+ }\r
+ cout<<endl;\r
+ index++;\r
+ } \r
+ }\r
+ }\r
+\r
+ \r
+}\r
+\r
+\r
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// MathConstant.h \r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+\r
+#ifndef __MATHCONSTANT_H__\r
+#define __MATHCONSTANT_H__\r
+\r
+#ifdef _MSC_VER\r
+#define _USE_MATH_DEFINES \r
+#endif\r
+\r
+#include "math.h"\r
+#include "stdlib.h"\r
+\r
+// Constants for numerical routines\r
+//\r
+\r
+#define TINY 1.0e-30 // A small number\r
+#define ITMAX 200 // Maximum number of iterations\r
+#define EPS 3.0e-7 // Relative accuracy\r
+#define ZEPS 3.0e-10 // Precision around zero\r
+#define FPMIN 1.0e-30 // Number near the smallest representable number\r
+#define FPMAX 1.0e+100 // Number near the largest representable number\r
+#define TOL 1.0e-6 // Zero SVD values below this\r
+#define GOLD 0.61803399 // Golden ratio\r
+#define CGOLD 0.38196601 // Complement of golden ratio\r
+\r
+inline double square(double a) { return a * a; }\r
+inline double sign(double a, double b) { return b >= 0 ? fabs(a) : -fabs(a); }\r
+inline double min(double a, double b) { return a < b ? a : b; }\r
+inline double max(double a, double b) { return a > b ? a : b; }\r
+\r
+inline int square(int a) { return a * a; }\r
+inline int sign(int a, int b) { return b >= 0 ? abs(a) : -abs(a); }\r
+inline int min(int a, int b) { return a < b ? a : b; }\r
+inline int max(int a, int b) { return a > b ? a : b; }\r
+\r
+#endif\r
--- /dev/null
+
+//////////////////////////////////////////////////////////////////////////////
+// This file includes code derived from the original Mersenne Twister Code
+// by Makoto Matsumoto and Takuji Nishimura
+// and is subject to their original copyright notice copied below:
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// COPYRIGHT NOTICE FOR MERSENNE TWISTER CODE
+// Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. The names of its contributors may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "Random.h"
+#include "MathConstant.h"
+#include "Error.h"
+
+#include <math.h>
+
+//Constants used internally by Mersenne random number generator
+#define MERSENNE_N 624
+#define MERSENNE_M 397
+
+// constant vector a
+#define MATRIX_A 0x9908b0dfUL
+
+// most significant w-r bits
+#define UPPER_MASK 0x80000000UL
+
+// least significant r bits
+#define LOWER_MASK 0x7fffffffUL
+
+
+// Constants used internally by Park-Miller random generator
+#define IA 16807
+#define IM 2147483647
+#define AM (1.0 / IM)
+#define IQ 127773
+#define IR 2836
+#define NTAB 32
+#define NDIV (1+(IM-1)/NTAB)
+#define RNMX (1.0-EPS)
+
+Random::Random(long s)
+ {
+#ifndef __NO_MERSENNE
+ mt = new unsigned long [MERSENNE_N];
+ mti = MERSENNE_N + 1;
+ mersenneMult = 1.0/4294967296.0;
+#else
+ shuffler = new long [NTAB];
+#endif
+ Reset(s);
+ }
+
+Random::~Random()
+ {
+#ifndef __NO_MERSENNE
+ delete [] mt;
+#else
+ delete [] shuffler;
+#endif
+ }
+
+void Random::Reset(long s)
+ {
+ normSaved = 0;
+
+#ifndef __NO_MERSENNE
+ InitMersenne(s);
+#else
+ // 'Continuous' Random Generator
+ if ((seed = s) < 1)
+ seed = s == 0 ? 1 : -s; // seed == 0 would be disastrous
+
+ for (int j=NTAB+7; j>=0; j--) // Warm up and set shuffle table
+ {
+ long k = seed / IQ;
+ seed = IA * (seed - k * IQ) - IR * k;
+ if (seed < 0) seed += IM;
+ if (j < NTAB) shuffler[j] = seed;
+ }
+ last=shuffler[0];
+#endif
+ }
+
+// initializes mt[MERSENNE_N] with a seed
+void Random::InitMersenne(unsigned long s)
+ {
+ mt[0]= s & 0xffffffffUL;
+ for (mti = 1; mti < MERSENNE_N; mti++)
+ {
+ mt[mti] = (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+
+ mt[mti] &= 0xffffffffUL;
+ }
+ }
+
+int Random::Binary()
+ {
+ return Next() > 0.5 ? 1 : 0;
+ }
+
+#ifndef __NO_MERSENNE
+
+double Random::Next()
+ {
+ unsigned long y;
+
+ // mag01[x] = x * MATRIX_A for x=0,1
+ static unsigned long mag01[2]={0x0UL, MATRIX_A};
+
+ if (mti >= MERSENNE_N)
+ {
+ /* generate MERSENNE_N words at one time */
+ int kk;
+
+ // If InitMersenne() has not been called, a default initial seed is used
+ if (mti == MERSENNE_N+1)
+ InitMersenne(5489UL);
+
+ for (kk=0; kk < MERSENNE_N-MERSENNE_M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+ mt[kk] = mt[kk+MERSENNE_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+
+ for (; kk < MERSENNE_N-1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+ mt[kk] = mt[kk+(MERSENNE_M - MERSENNE_N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+
+ y = (mt[MERSENNE_N-1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[MERSENNE_N-1] = mt[MERSENNE_M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+
+ // Tempering
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680UL;
+ y ^= (y << 15) & 0xefc60000UL;
+ y ^= (y >> 18);
+
+ return (mersenneMult * ((double) y + 0.5));
+ }
+
+// Generates a random number on [0,0xffffffff]-interval
+
+unsigned long Random::NextInt()
+ {
+ unsigned long y;
+
+ // mag01[x] = x * MATRIX_A for x=0,1
+ static unsigned long mag01[2]={0x0UL, MATRIX_A};
+
+ if (mti >= MERSENNE_N)
+ {
+ /* generate MERSENNE_N words at one time */
+ int kk;
+
+ // If InitMersenne() has not been called, a default initial seed is used
+ if (mti == MERSENNE_N + 1)
+ InitMersenne(5489UL);
+
+ for (kk= 0; kk < MERSENNE_N - MERSENNE_M; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+ mt[kk] = mt[kk+MERSENNE_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+
+ for (; kk< MERSENNE_N-1; kk++)
+ {
+ y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+ mt[kk] = mt[kk+(MERSENNE_M - MERSENNE_N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+
+ y = (mt[MERSENNE_N-1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+ mt[MERSENNE_N-1] = mt[MERSENNE_M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+
+ // Tempering
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680UL;
+ y ^= (y << 15) & 0xefc60000UL;
+ y ^= (y >> 18);
+
+ return y;
+ }
+
+#else
+
+double Random::Next()
+ {
+ // Compute seed = (IA * seed) % IM without overflows
+ // by Schrage's method
+ long k = seed / IQ;
+ seed = IA * (seed - k * IQ) - IR * k;
+ if (seed < 0) seed += IM;
+
+ // Map to 0..NTAB-1
+ int j = last/NDIV;
+
+ // Output value is shuffler[j], which is in turn replaced by seed
+ last = shuffler[j];
+ shuffler[j] = seed;
+
+ // Map to 0.0 .. 1.0 excluding endpoints
+ double temp = AM * last;
+ if (temp > RNMX) return RNMX;
+ return temp;
+ }
+
+unsigned long Random::NextInt()
+ {
+ // Compute seed = (IA * seed) % IM without overflows
+ // by Schrage's method
+ long k = seed / IQ;
+ seed = IA * (seed - k * IQ) - IR * k;
+ if (seed < 0) seed += IM;
+
+ // Map to 0..NTAB-1
+ int j = last/NDIV;
+
+ // Output value is shuffler[j], which is in turn replaced by seed
+ last = shuffler[j];
+ shuffler[j] = seed;
+
+ return last;
+ }
+
+#endif
+
+double Random::Normal()
+ {
+ double v1, v2, fac, rsq;
+
+ if (!normSaved) // Do we need new numbers?
+ {
+ do {
+ v1 = 2.0 * Next() - 1.0; // Pick two coordinates from
+ v2 = 2.0 * Next() - 1.0; // -1 to +1 and check if they
+ rsq = v1*v1 + v2*v2; // are in unit circle...
+ } while (rsq >= 1.0 || rsq == 0.0);
+
+ fac = sqrt(-2.0 * log(rsq)/rsq); // Apply the Box-Muller
+ normStore = v1 * fac; // transformation and save
+ normSaved = 1; // one deviate for next time
+ return v2 * fac;
+ }
+ else
+ {
+ normSaved = 0;
+ return normStore;
+ }
+ }
+
+void Random::Choose(int * array, int n, int k)
+ {
+ int choices = 1, others = 0;
+
+ if (k > n / 2)
+ {
+ choices = 0;
+ others = 1;
+ k = n - k;
+ }
+
+ for (int i = 0; i < n; i++)
+ array[i] = others;
+
+ while (k > 0)
+ {
+ int i = NextInt() % n;
+
+ if (array[i] == choices) continue;
+
+ array[i] = choices;
+ k--;
+ }
+ }
+
+void Random::Choose(int * array, float * weights, int n, int k)
+ {
+ int choices = 1, others = 0;
+
+ if (k > n / 2)
+ {
+ choices = 0;
+ others = 1;
+ k = n - k;
+ }
+
+ // First calculate cumulative sums of weights ...
+ float * cumulative = new float [n + 1];
+
+ cumulative[0] = 0;
+ for (int i = 1; i <= n; i++)
+ cumulative[i] = cumulative[i - 1] + weights[i - 1];
+
+ float & sum = cumulative[n], reject = 0.0;
+
+ for (int i = 0; i < n; i++)
+ array[i] = others;
+
+ while (k > 0)
+ {
+ float weight = (float)(Next() * sum);
+
+ int hi = n, lo = 0, i = 0;
+
+ while (hi >= lo)
+ {
+ i = (hi + lo) / 2;
+
+ if (cumulative[i + 1] <= weight)
+ lo = i + 1;
+ else if (cumulative[i] >= weight)
+ hi = i - 1;
+ else break;
+ }
+
+ if (array[i] == choices) continue;
+
+ array[i] = choices;
+ reject += weights[i];
+
+ // After selecting a substantial number of elements, update the cumulative
+ // distribution -- to ensure that at least half of our samples produce a hit
+ if (reject > sum * 0.50)
+ {
+ cumulative[0] = 0;
+ for (int i = 1; i <= n; i++)
+ if (array[i] != choices)
+ cumulative[i] = cumulative[i - 1] + weights[i - 1];
+ else
+ cumulative[i] = cumulative[i - 1];
+
+ reject = 0.0;
+ }
+
+ k--;
+ }
+ }
+
+Random globalRandom;
+
+
--- /dev/null
+
+//////////////////////////////////////////////////////////////////////////////
+// This file includes code derived from the original Mersenne Twister Code
+// by Makoto Matsumoto and Takuji Nishimura
+// and is subject to their original copyright notice copied below:
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// COPYRIGHT NOTICE FOR MERSENNE TWISTER CODE
+//
+// Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. The names of its contributors may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef __RANDOM_H__
+#define __RANDOM_H__
+
+// Define a quick and dirty generator
+#define RANDMUL 1664525L
+#define RANDADD 1013904223L
+
+#define RAND(seed) ((seed = seed * RANDMUL + RANDADD) & 0xFFFFFFFF)
+
+class Random
+// Implements the Mersenne Twister as default random number generator.
+// Compilation flag __NO_MERSENNE sets default generator to
+// a minimal Park-Miller with Bays-Durham shuffle and added safe guards.
+ {
+ protected:
+ // values for "minimal random values"
+ long seed;
+ long last;
+ long * shuffler;
+
+ // and for normal deviates
+ int normSaved;
+ double normStore;
+
+ double mersenneMult;
+
+ // Array for Mersenne state vector
+ unsigned long * mt;
+
+ // Used to signal that Mersenne state vector is not initialized
+ int mti;
+
+
+ public:
+
+ Random(long s = 0x7654321);
+ ~Random();
+
+ // Next bit in series of 0s and 1s
+ int Binary(); // Next bit in series of 0s and 1s
+
+ // Next value in series, between 0 and 1
+ double Next();
+
+ // Next integer
+ unsigned long NextInt();
+
+ // Random number form N(0,1)
+ double Normal();
+
+ void Reset(long s);
+ void InitMersenne(unsigned long s);
+
+ // Random number between 0 and 1
+ operator double()
+ { return Next(); }
+
+ // Random number between arbitrary bounds
+ double Uniform(double lo = 0.0, double hi = 1.0)
+ {
+ return lo + (hi - lo) * Next();
+ }
+
+ void Choose(int * array, int n, int k);
+ void Choose(int * array, float * weights, int n, int k);
+
+ };
+
+extern Random globalRandom;
+
+#endif
+
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// genome.cpp\r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// Version 0.2\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#define _CRT_SECURE_NO_DEPRECATE \r
+\r
+#include "Random.h"\r
+#include "Error.h"\r
+#include "stochastic.h" \r
+#include <iostream>\r
+#include <fstream>\r
+#include <math.h>\r
+#include <vector> \r
+#include <time.h>\r
+#include <algorithm>\r
+#include <map>\r
+#include <sstream>\r
+#include <stdio.h>\r
+\r
+using namespace std;\r
+\r
+//static bool debug = false;\r
+static bool fixedPOP = true;\r
+static bool noChange = true;\r
+\r
+\r
+static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment\r
+static int maxN;\r
+static unsigned int currentProfile;\r
+static int nPOP=1;\r
+static vector<int> nSample(nPOP,3);\r
+static double recombination = 1e-8, migration = 0.0, mutation=1e-8;\r
+\r
+static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;\r
+static int *** children, *** parents, * cblocks, * pblocks;\r
+static int * parent_First, * parent_Last, * child_First, * child_Last;\r
+static bool * child_flags, * parent_flags;\r
+static int nextFree = 0, activeSegments = 0, pblockcounter = 0;\r
+static int numBLOCKs;\r
+static int BUFFER=1500;\r
+static int INCREMENT=1000;\r
+\r
+#define BLOCKSIZE 128\r
+#define SWAP(tmp,a,b) tmp=a; a=b; b=tmp;\r
+\r
+static vector< vector< vector<bool> > > chromosome;\r
+static int * fragmentLength;\r
+static int * branchFragment;\r
+static int * geneIndex;\r
+static int * branchMutation;\r
+\r
+static float * recombVector=NULL;\r
+\r
+struct popProfilesStruct{\r
+ int generation;\r
+ vector<int> popsize;\r
+ vector<int> popStart;\r
+ vector< vector<float> > selectProb;\r
+ vector< vector<int> > popChange;\r
+};\r
+\r
+static vector<struct popProfilesStruct> popProfile;\r
+\r
+void print(){\r
+ cout<<"\n*****************\nParents:\t";\r
+ for(int i=0;i<N;i++){\r
+ if(parent_flags[i]){\r
+ for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
+ cout<<"\t"; \r
+ }\r
+ } \r
+ cout<<endl;\r
+ \r
+ cout<<"Children:\t";\r
+ for(int i=0;i<N;i++){\r
+ if(child_flags[i]){\r
+ for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
+ cout<<"\t"; \r
+ }\r
+ } \r
+ cout<<endl;\r
+ \r
+ \r
+ \r
+// cout<<"parent_flag:\t";\r
+// for(int i=0;i<N;i++){\r
+// cout<<parent_flags[i]<<"\t";\r
+// } \r
+// cout<<endl;\r
+// \r
+// cout<<"children_flag:\t";\r
+// for(int i=0;i<N;i++){\r
+// cout<<child_flags[i]<<"\t";\r
+// } \r
+// cout<<endl;\r
+ \r
+ cout<<"genepool:\t";\r
+ for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
+ cout<<endl;\r
+ \r
+ cout<<"genetimes:\t";\r
+ for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
+ cout<<endl;\r
+ \r
+ cout<<"MCRA:\t";\r
+ for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";\r
+ cout<<endl;\r
+ \r
+ cout<<"MCRAtime:\t";\r
+ for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";\r
+ cout<<endl;\r
+ \r
+ cout<<"fragments:\t";\r
+ for(int i=0;i<L;i++)cout<<fragments[i]<<" ";\r
+ cout<<endl;\r
+ \r
+ cout<<"NextFree="<<nextFree<<endl;\r
+}\r
+\r
+int *** AllocateIntPtrMatrix(int rows, int columns){\r
+ int *** result = new int ** [rows];\r
+\r
+ for (int i = 0; i < rows; i++)\r
+ result[i] = new int * [columns];\r
+ \r
+ return result;\r
+}\r
+ \r
+void FreeIntPtrMatrix(int *** & matrix, int rows){\r
+ \r
+ for (int i = 0; i < rows; i++)\r
+ delete [] matrix[i];\r
+\r
+ delete [] matrix;\r
+\r
+ matrix = NULL;\r
+} \r
+ \r
+void NewGeneration(){\r
+ \r
+ int *** ptrmatrix, * vector;\r
+ bool * boolvector;\r
+ \r
+// for(int i=0;i<maxN;i++){\r
+// if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]); \r
+// }\r
+ \r
+ SWAP(ptrmatrix, children, parents);\r
+ SWAP(vector, child_First, parent_First);\r
+ SWAP(vector, child_Last, parent_Last);\r
+ SWAP(boolvector, child_flags, parent_flags);\r
+ SWAP(vector, cblocks, pblocks);\r
+ \r
+ if(!noChange){\r
+ currentProfile++;\r
+ nPOP = (int)(popProfile[currentProfile].popsize.size());\r
+ N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();\r
+ noChange = true;\r
+ }\r
+\r
+ for (int i = 0; i < maxN; i++)\r
+ parent_flags[i] = 0;\r
+ \r
+// for (int i = 0; i < maxN; i++)\r
+// for (int j = 0; j < numBLOCKs; j++)\r
+// parents[i][j] = NULL;\r
+ \r
+ pblockcounter = 0;\r
+\r
+}\r
+\r
+ \r
+void AllocateMemory(vector< vector<bool> > &return_chromosome){\r
+ \r
+ \r
+// if(SNP<=0){\r
+// cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;\r
+// }\r
+// else{\r
+// cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;\r
+// }\r
+// cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;\r
+ \r
+ \r
+ return_chromosome.clear();\r
+ return_chromosome.resize(n);\r
+// for(int i=0;i<n;i++){\r
+// if(SNP<=0){\r
+// //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));\r
+// }\r
+// else{\r
+// //return_chromosome[i].reserve(SNP*numChr);\r
+// }\r
+// }\r
+ \r
+ chromosome.resize(n);\r
+ for(int i=0;i<n;i++){\r
+ chromosome[i].resize(L);\r
+// for(int j=0;j<L;j++){\r
+// if(SNP<=0){\r
+// //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));\r
+// }\r
+// else{\r
+// //chromosome[i][j].reserve(SNP/L+1);\r
+// }\r
+// }\r
+ }\r
+ \r
+ genepool = new int [poolsize];\r
+\r
+ numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);\r
+ \r
+ pblockcounter = 0;\r
+\r
+}\r
+\r
+\r
+void AllocateMutationMemory(){\r
+ \r
+ fragmentLength = new int [L];\r
+ \r
+ branchFragment = new int [poolsize];\r
+ \r
+ geneIndex = new int [poolsize]; \r
+ \r
+}\r
+ \r
+ \r
+ \r
+void FreeMemory(){\r
+ \r
+ delete [] genepool;\r
+ \r
+ if(recombVector!=NULL)delete [] recombVector;\r
+ \r
+}\r
+ \r
+ \r
+void FreeMutationMemory(){\r
+ \r
+ delete [] fragmentLength;\r
+ delete [] geneIndex;\r
+ delete [] branchMutation;\r
+ \r
+ \r
+}\r
+\r
+void FreeCoalescentMemory(){\r
+ \r
+ delete [] MRCA;\r
+ delete [] MRCAtimes;\r
+\r
+ delete [] fragments;\r
+\r
+ FreeIntPtrMatrix(children,maxN);\r
+ \r
+ FreeIntPtrMatrix(parents,maxN);\r
+\r
+ delete [] child_flags;\r
+ delete [] parent_flags;\r
+\r
+ delete [] cblocks;\r
+ delete [] pblocks;\r
+ \r
+ delete [] parent_First;\r
+ delete [] parent_Last;\r
+\r
+ delete [] child_First;\r
+ delete [] child_Last;\r
+ \r
+}\r
+\r
+void reallocBlock(){\r
+ \r
+ pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
+ cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
+ BUFFER += INCREMENT; \r
+ \r
+}\r
+\r
+void setnull(int parent, int position){\r
+ \r
+ if (parent_flags[parent] == 0){\r
+ parent_First[parent]=position / BLOCKSIZE;\r
+ parent_Last[parent]=position / BLOCKSIZE; \r
+ parents[parent][position / BLOCKSIZE] = NULL; \r
+ }\r
+ else if(parent_First[parent]>position / BLOCKSIZE){\r
+ for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){\r
+ parents[parent][i] = NULL;\r
+ }\r
+ parent_First[parent]=position / BLOCKSIZE;\r
+ }\r
+ else if(parent_Last[parent]<position / BLOCKSIZE){\r
+ for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){\r
+ parents[parent][i] = NULL;\r
+ }\r
+ parent_Last[parent]=position / BLOCKSIZE;\r
+ }\r
+ \r
+}\r
+\r
+\r
+\r
+void TouchParent(int parent, int position){\r
+\r
+ \r
+ setnull(parent,position);\r
+ \r
+ if (parents[parent][position / BLOCKSIZE] != NULL)return;\r
+ \r
+\r
+ if (pblockcounter >= n*numBLOCKs+BUFFER){\r
+ \r
+ int * pblocks_old=pblocks;\r
+ int * cblocks_old=cblocks;\r
+ \r
+ reallocBlock();\r
+ \r
+ for(int i=0;i<N;i++)\r
+ if(parent_flags[i])\r
+ for(int j=parent_First[i];j<=parent_Last[i];j++)\r
+ if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;\r
+ \r
+ for(int i=0;i<N;i++)\r
+ if(child_flags[i])\r
+ for(int j=child_First[i];j<=child_Last[i];j++)\r
+ if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;\r
+ \r
+ printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);\r
+ \r
+ if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");\r
+ }\r
+\r
+ parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;\r
+ \r
+ for (int i = 0; i < BLOCKSIZE; i++)\r
+ parents[parent] [position / BLOCKSIZE] [i] = -1;\r
+ \r
+ parent_flags[parent] = 1;\r
+ \r
+}\r
+\r
+void Initialize(){\r
+ \r
+ genetimes = new int [poolsize];\r
+ \r
+ MRCA = new int [L];\r
+ MRCAtimes = new int [L];\r
+\r
+ fragments = new int [L];\r
+\r
+ children = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
+ parents = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
+ \r
+ parent_First = new int [maxN];\r
+ parent_Last = new int [maxN];\r
+\r
+ child_First = new int [maxN];\r
+ child_Last = new int [maxN];\r
+\r
+ cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));\r
+ pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE))); \r
+ \r
+ child_flags = new bool [maxN];\r
+ parent_flags = new bool [maxN];\r
+ \r
+ \r
+ noChange = true;\r
+ \r
+ nPOP = (int)(popProfile[0].popsize.size());\r
+ \r
+ N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
+ \r
+ currentProfile = 0;\r
+ \r
+ nextFree = 0;\r
+ \r
+ for (int i = 0; i < N; i++)\r
+ child_flags[i] = 0;\r
+ \r
+ int count=0;\r
+ for(int k=0;k<nPOP;k++){ \r
+ for (int i = 0; i < nSample[k]; i++)\r
+ {\r
+ child_First[popProfile[0].popStart[k]+i]=0;\r
+ child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;\r
+\r
+ child_flags[popProfile[0].popStart[k]+i] = 1;\r
+\r
+ for (int j = 0; j < numBLOCKs; j++)\r
+ children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);\r
+ \r
+ for (int j = 0; j < L; j++)\r
+ {\r
+ genepool[nextFree] = 1;\r
+ genetimes[nextFree] = 0;\r
+ children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;\r
+ }\r
+ \r
+ count++;\r
+ }\r
+ }\r
+\r
+ for (int i = 0; i < L; i++)\r
+ fragments[i] = n;\r
+\r
+ for (int i = 0; i < N; i++)\r
+ parent_flags[i] = 0;\r
+\r
+ activeSegments = L;\r
+ \r
+ //for (int i = 0; i < N; i++)\r
+ // for (int j = 0; j < numBLOCKs; j++)\r
+ // parents[i][j] = NULL;\r
+ \r
+ pblockcounter = 0;\r
+ \r
+}\r
+\r
+void InitializeMutation(vector< vector<bool> > &return_chromosome){\r
+ \r
+// return_chromosome.clear();\r
+// return_chromosome.resize(n);\r
+// for(int i=0;i<n;i++){\r
+// return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));\r
+// }\r
+// \r
+// chromosome.resize(n);\r
+// for(int i=0;i<n;i++){\r
+// chromosome[i].resize(L);\r
+// for(int j=0;j<L;j++){\r
+// chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));\r
+// }\r
+// }\r
+ \r
+// cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
+ \r
+ for(int i=0;i<n;i++){\r
+ for(int j=0;j<L;j++){\r
+ chromosome[i][j].clear();\r
+ }\r
+ }\r
+ \r
+// cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
+ \r
+ branchMutation = new int [nextFree];\r
+ for(int i=0;i<nextFree;i++)branchMutation[i]=0;\r
+\r
+}\r
+\r
+void PrepareMutation(){\r
+ \r
+ for(int i=0;i<L;i++)fragmentLength[i]=0;\r
+ \r
+ for(int i=0;i<nextFree;i++){\r
+ geneIndex[i] = -1; \r
+ branchFragment[i] = -1;\r
+ } \r
+ \r
+}\r
+ \r
+int SampleParent(int individual, int previous){\r
+ \r
+ if(fixedPOP){ \r
+ int population = individual/popProfile[0].popsize[0]; \r
+ \r
+ if (previous == -1 && nPOP>1){\r
+ if (globalRandom.Next() < migration){\r
+ int random = globalRandom.NextInt()%(nPOP-1); \r
+ if(population<=random)population=random+1;\r
+ else population=random;\r
+ }\r
+ }\r
+ \r
+ return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];\r
+ }\r
+ else{\r
+ \r
+ if(noChange){\r
+ int population=0; \r
+ \r
+ for(int i=0;i<nPOP;i++){\r
+ if(individual>=popProfile[currentProfile].popStart[i])population=i;\r
+ else break;\r
+ }\r
+ \r
+// cout<<"No change: ind="<<individual<<" pop="<<population<<endl;\r
+ \r
+ if (previous == -1 && nPOP>1){\r
+ if (globalRandom.Next() < migration){\r
+ int random = globalRandom.NextInt()%(nPOP-1); \r
+ if(population<=random)population=random+1;\r
+ else population=random;\r
+ }\r
+ }\r
+ \r
+// cout<<"new pop="<<population<<endl;\r
+ \r
+ return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];\r
+ }\r
+ else{\r
+ int population=0; \r
+ \r
+ for(int i=0;i<nPOP;i++){\r
+ if(individual>=popProfile[currentProfile].popStart[i])population=i; \r
+ else break;\r
+ }\r
+// cout<<"Change: ind="<<individual<<" pop="<<population<<endl;\r
+ \r
+ if (previous == -1 && nPOP>1){\r
+ if (globalRandom.Next() < migration){\r
+ int random = globalRandom.NextInt()%(nPOP-1); \r
+ if(population<=random)population=random+1;\r
+ else population=random;\r
+ }\r
+ }\r
+ \r
+// cout<<"new pop="<<population<<endl;\r
+ \r
+ if(popProfile[currentProfile].popChange[population].size()==1){\r
+// cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;\r
+ \r
+ return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]]) \r
+ + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]]; \r
+ }\r
+ else{\r
+ double selectRandom = globalRandom.Next();\r
+ for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){\r
+ if(selectRandom < popProfile[currentProfile].selectProb[population][i]){\r
+ \r
+// cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;\r
+ \r
+ return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]]) \r
+ + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];\r
+ } \r
+ }\r
+ cout<<"Random number generator error!"<<endl;\r
+ exit(1);\r
+ }\r
+ \r
+ \r
+ }\r
+ \r
+ }\r
+}\r
+\r
+void readPopProfile(string filename, unsigned int numPopulation){\r
+ \r
+ int popProfile_index=0;\r
+ \r
+ popProfile.clear();\r
+ struct popProfilesStruct temp_popProfile;\r
+ popProfile.push_back(temp_popProfile); \r
+ \r
+ ifstream popFile;\r
+ string fileline;\r
+ char *term;\r
+ char *term1;\r
+ \r
+ popFile.open(filename.c_str());\r
+ if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());\r
+ \r
+ getline(popFile,fileline); // the first line is the population sizes at generation 0\r
+ \r
+ term = strtok ((char*)fileline.c_str()," \t"); // the first term is the generation\r
+ popProfile[popProfile_index].generation=atoi(term);\r
+ \r
+ if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");\r
+ \r
+ term = strtok (NULL, " \t"); // the second term and after are the sizes of that population\r
+ while(term!=NULL){\r
+ popProfile[popProfile_index].popsize.push_back(atoi(term));\r
+ term = strtok (NULL, " \t");\r
+ }\r
+\r
+ popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
+ for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
+ popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
+ }\r
+ \r
+ maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
+ \r
+ if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());\r
+ \r
+ // read in the population correspondence to the next population profile\r
+ \r
+ \r
+ while(popFile.peek()!=EOF){\r
+ getline(popFile,fileline); // the even number lines are the correspondence of populations of different generation\r
+ popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());\r
+ \r
+ term = strtok ((char*)fileline.c_str(),"- \t"); // the FROM population\r
+ while(term!=NULL){\r
+ \r
+ term1 = strtok (NULL, "- \t"); // the TO population\r
+\r
+ popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);\r
+ \r
+ term = strtok (NULL, "- \t");\r
+ }\r
+ \r
+ popProfile_index++;\r
+ popProfile.push_back(temp_popProfile);\r
+ \r
+ if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");\r
+ \r
+ // read the next population profile\r
+ \r
+ getline(popFile,fileline); // the population sizes\r
+ \r
+ term = strtok ((char*)fileline.c_str()," \t"); // the first term is the generation\r
+ popProfile[popProfile_index].generation=atoi(term);\r
+ \r
+ term = strtok (NULL, " \t"); // the second term and after are the sizes of that population\r
+ while(term!=NULL){\r
+ popProfile[popProfile_index].popsize.push_back(atoi(term));\r
+ term = strtok (NULL, " \t");\r
+ }\r
+ \r
+ popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
+ for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
+ popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
+ }\r
+ \r
+ if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())\r
+ maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
+ } \r
+ \r
+ for(unsigned int i=0;i<popProfile.size();i++){\r
+ popProfile[i].selectProb.resize(popProfile[i].popChange.size());\r
+ for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
+ float sum=0.0;\r
+ for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];\r
+ \r
+ popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);\r
+ for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)\r
+ popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);\r
+ }\r
+ }\r
+ // check for consistence\r
+ \r
+ int min, max;\r
+ \r
+ for(unsigned int i=0;i<popProfile.size()-1;i++){\r
+ min=99999;max=-1;\r
+ for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
+ if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1); \r
+ for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){\r
+ if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];\r
+ if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];\r
+ }\r
+ } \r
+ if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))\r
+ error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());\r
+ }\r
+ \r
+}\r
+\r
+\r
+void readRecombination(string filename, int pieces){\r
+\r
+ ifstream recombFile;\r
+ string fileline;\r
+ char *term;\r
+ \r
+ vector<float> recDistribution;\r
+ vector<float> recRate;\r
+ \r
+ float sum=0.0;\r
+ float random;\r
+ \r
+ recombFile.open(filename.c_str());\r
+ if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());\r
+ \r
+ getline(recombFile,fileline); // the first line is head of distribution table\r
+ term = strtok ((char*)fileline.c_str()," \t");\r
+ term = strtok (NULL, " \t"); \r
+ \r
+ if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){\r
+ while(recombFile.peek()!=EOF){\r
+ getline(recombFile,fileline);\r
+ \r
+ term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate\r
+ recRate.push_back((float)(atof(term)));\r
+ \r
+ if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));\r
+ \r
+ term = strtok (NULL, " \t"); // the second number is the frequency of recombination rate\r
+ recDistribution.push_back((float)(atof(term)));\r
+ \r
+ if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));\r
+ \r
+ sum += (float)(atof(term));\r
+ }\r
+ \r
+ if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());\r
+ \r
+ // normalize the frequency \r
+ for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;\r
+ \r
+ // compute the cumulative distribution\r
+ for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];\r
+ \r
+ recombVector = new float [pieces-1];\r
+ \r
+ for(int i=0;i<pieces-1;i++){\r
+ random = (float)(globalRandom.Next());\r
+ for(unsigned int j=0;j<recDistribution.size();j++){\r
+ if(random < recDistribution[j]){\r
+ recombVector[i]=recRate[j];\r
+ break; \r
+ }\r
+ }\r
+ }\r
+ \r
+ //check the input\r
+ cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;\r
+ cout<<"Recombination_Rate\tCumulative_frequency"<<endl;\r
+ for(unsigned int i=0;i<recRate.size();i++){\r
+ cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl; \r
+ } \r
+ \r
+ }\r
+ else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){\r
+ float currentRec=0.0,nextRec=0.0;\r
+ int nextPos=-1;\r
+ \r
+ if(recombFile.peek()!=EOF){\r
+ getline(recombFile,fileline);\r
+ term = strtok ((char*)fileline.c_str()," \t");\r
+ currentRec=(float)(atof(term));\r
+ \r
+ if(recombFile.peek()!=EOF){\r
+ getline(recombFile,fileline);\r
+ term = strtok ((char*)fileline.c_str()," \t");\r
+ nextRec=(float)(atof(term));\r
+ term = strtok (NULL, " \t"); \r
+ nextPos=atoi(term);\r
+ }\r
+ else{\r
+ nextPos=-1;\r
+ }\r
+ \r
+ }\r
+ else{\r
+ error("Recombination file does not contain recombination rate information.\n");\r
+ }\r
+ \r
+ recombVector = new float [pieces-1];\r
+ \r
+ for(int i=0;i<pieces-1;i++){\r
+ if(i+1==nextPos){\r
+ currentRec=nextRec;\r
+ if(recombFile.peek()!=EOF){\r
+ getline(recombFile,fileline);\r
+ term = strtok ((char*)fileline.c_str()," \t");\r
+ nextRec=(float)(atof(term));\r
+ term = strtok (NULL, " \t"); \r
+ nextPos=atoi(term);\r
+ }\r
+ else{\r
+ nextPos=-1;\r
+ }\r
+ }\r
+ \r
+ recombVector[i]=currentRec;\r
+ }\r
+ \r
+ \r
+ \r
+ }\r
+ else{\r
+ error("The second column is not \"freq\" or \"pos\".\n");\r
+ }\r
+ \r
+ \r
+\r
+ \r
+ cout<<"The recombination rates between fragments are:"<<endl<<"\t";\r
+ for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";\r
+ cout<<"("<<pieces<<")"<<endl;\r
+ cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;\r
+ \r
+}\r
+\r
+void drawtrees(){\r
+ \r
+ map<int,string > tree;\r
+ ostringstream temp;\r
+ \r
+ \r
+ for(int i=0;i<L;i++){\r
+ tree.clear();\r
+ \r
+ for(int j=0;j<n;j++){\r
+ if(tree[genepool[j*L+i]]!=""){\r
+ temp.str("");\r
+ temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
+ tree[genepool[j*L+i]]= temp.str();\r
+ }\r
+ else{\r
+ temp.str("");\r
+ temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
+ tree[genepool[j*L+i]]= temp.str();\r
+ }\r
+ }\r
+ \r
+ \r
+ for(int j=n*L;j<nextFree;j++){\r
+ if(tree[j]!=""){\r
+ if(genepool[j]==0){\r
+ cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;\r
+ }\r
+ else{ \r
+ if(tree[genepool[j]]!=""){\r
+ temp.str("");\r
+ temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
+ tree[genepool[j]]= temp.str();\r
+ }\r
+ else{\r
+ temp.str("");\r
+ temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
+ tree[genepool[j]]=temp.str();\r
+ }\r
+ }\r
+ }\r
+ }\r
+ }\r
+ \r
+ tree.clear();\r
+}\r
+\r
+\r
+\r
+\r
+void genome(string popSize, // effective population size of each population\r
+ int nSubPOP, // number of populations\r
+ vector<int> & nSubSample, // number of samples (haplotypes) draw from each populations\r
+ int numPieces, // number of fragments for each sample (chromosome)\r
+ int pieceLen, // length in base pair of each fragment\r
+ int numIndepRegion, // number of independent regions (independent chromosome)\r
+ int s, // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution \r
+ string rec, // recombination rate between consecutive fragments per generation\r
+ double mut, // mutation rate per generation per base pair\r
+ double mig, // migration rate per generation\r
+ vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
+ long t, // random seed \r
+ int drawtree, // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees \r
+ bool printparameters // =1 print out all input parameters, =0 do not print input parameters\r
+ )\r
+{ \r
+ if(atoi(popSize.c_str())>0){ // if the population size is fixed during evoluation\r
+ N = atoi(popSize.c_str())*nSubPOP;\r
+ maxN = N;\r
+ \r
+ fixedPOP = true;\r
+ \r
+ popProfile.clear();\r
+ struct popProfilesStruct temp_popProfile;\r
+ popProfile.push_back(temp_popProfile);\r
+ \r
+ popProfile[0].generation = 0;\r
+ \r
+ for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));\r
+ for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));\r
+ \r
+ cout<<"*** POPULATION PROFILE ***"<<endl;\r
+ cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;\r
+ cout<<"Sizes of populations = ";\r
+ for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";\r
+ cout<<endl;\r
+// cout<<"POP Start index = ";\r
+// for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";\r
+// cout<<endl;\r
+ cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;\r
+ \r
+ }\r
+ else{ // popSize stores the filename of the population profile during evoluation\r
+ fixedPOP = false; \r
+ \r
+ currentProfile = 0;\r
+ \r
+ readPopProfile(popSize,nSubPOP);\r
+ \r
+ N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
+ \r
+ // print out the population profile information.\r
+ \r
+ cout<<"*** POPULATION PROFILE ***"<<endl;\r
+ cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;\r
+ cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;\r
+ \r
+ for(unsigned int i=0;i<popProfile.size();i++){\r
+ cout<<"Generation = "<<popProfile[i].generation<<endl;\r
+ cout<<"Sizes of populations= ";\r
+ for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";\r
+ cout<<endl;\r
+// cout<<"POP Start index = ";\r
+// for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";\r
+// cout<<endl;\r
+ if(popProfile[i].popChange.size()>0){\r
+ cout<<"Populations change backward in time:"<<endl;\r
+ for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
+ cout<<" From "<<j+1<<" to ";\r
+ for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";\r
+ cout<<endl;\r
+ }\r
+ \r
+// cout<<"Selection cumulative probability:"<<endl; \r
+// for(int j=0;j<popProfile[i].selectProb.size();j++){\r
+// cout<<" From "<<j+1<<" with prob = ";\r
+// for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";\r
+// cout<<endl;\r
+// }\r
+ }\r
+ if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;\r
+ cout<<endl;\r
+ }\r
+ }\r
+ \r
+ n=0;\r
+ nPOP=nSubPOP;\r
+ nSample.resize(nPOP);\r
+ for(int i=0;i<nPOP;i++){\r
+ if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");\r
+ n += nSubSample[i];\r
+ nSample[i]=nSubSample[i];\r
+ }\r
+ \r
+ L = numPieces;\r
+ length = pieceLen;\r
+ numChr = numIndepRegion;\r
+ SNP=s;\r
+ \r
+ if(atof(rec.c_str())>0){ // if the recombination rate is fixed\r
+ recombination = atof(rec.c_str());\r
+ }\r
+ else{ // the distribution of recombination rate is described in the file (filename stored in rec)\r
+ recombination = -1;\r
+ readRecombination(rec,L);\r
+ }\r
+ \r
+ \r
+ \r
+ mutation = mut;\r
+ migration = mig;\r
+ \r
+ if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t\r
+ cout<<endl<<"random seed="<<t<<endl;\r
+ globalRandom.Reset(t); \r
+ \r
+ if(printparameters){\r
+ printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);\r
+ cout<<"subSample = ";\r
+ for(int i=0;i<nPOP;i++){\r
+ cout<<nSample[i]<<" ";\r
+ }\r
+ cout<<endl;\r
+ }\r
+ \r
+ if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){\r
+ cout<<"Input parameters have to be positive."<<endl;\r
+ printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);\r
+ exit(0);\r
+ }\r
+\r
+ poolsize = 2 * n * L - L;\r
+\r
+ AllocateMemory(return_chromosome);\r
+\r
+// cout<<"memory OK!"<<endl<<endl;\r
+ \r
+ for(int chr=0;chr<numChr;chr++){\r
+ \r
+ Initialize();\r
+ \r
+// cout<<"Initialization OK!"<<endl;\r
+ \r
+ bool done = false;\r
+ int generation = 0, units = n * L;\r
+ \r
+ while (!done)\r
+ {\r
+ // printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%] %c",\r
+ // generation++, units, nextFree * 100. / poolsize,\r
+ // !debug ? '\r' : '\n');\r
+ \r
+ generation++;\r
+ \r
+ if(!fixedPOP && currentProfile<popProfile.size()-1)\r
+ if(generation == popProfile[currentProfile+1].generation)noChange=false;\r
+ \r
+ //if (debug)\r
+ // {\r
+ // printf("F : ");\r
+ // for (int i = 0; i < L; i++)\r
+ // printf("%3d ", fragments[i]);\r
+ // printf("\n");\r
+ // }\r
+ \r
+ // Position of the first segment allocated to the current generation\r
+ int generationStart = nextFree;\r
+ \r
+ // For each individual in the population\r
+ for (int i = 0; i < N && !done; i++)\r
+ if (child_flags[i])\r
+ {\r
+ \r
+ /*if (debug) printf("%2d : ", i);*/\r
+ \r
+ // The position we are currently tracking and its distance\r
+ // from the previous position\r
+ int distance = 0, parent = -1;\r
+ int startPiece = 0;\r
+ float recombProb;\r
+ \r
+ for (int block = child_First[i]; block <= child_Last[i]; block++)\r
+ if (children[i][block] == NULL)\r
+ distance += BLOCKSIZE;\r
+ else\r
+ {\r
+ int pos = BLOCKSIZE * block;\r
+ int pos_in_block;\r
+ int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );\r
+\r
+ while (pos < pos_bound )\r
+ {\r
+ \r
+ pos_in_block = pos % BLOCKSIZE;\r
+ // Skip through uninformative positions along each chromosome\r
+ if (children[i][block][pos_in_block] == -1)\r
+ {\r
+ //if (debug) printf(" . \n");\r
+ distance++;\r
+ pos++;\r
+ continue;\r
+ }\r
+ \r
+ // Pick an ancestor at random for the current piece of chromosome\r
+ \r
+ if(recombination>0){ // if the recombination rate is fixed over the genome\r
+ recombProb = (float)(pow(1.0 - recombination, distance));\r
+ }\r
+ else{ // if the recombination rate is specified by the profile\r
+ recombProb = 1;\r
+ for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);\r
+ }\r
+ \r
+ \r
+ if (parent == -1 ||\r
+ distance > 0 && globalRandom.Next() > recombProb){\r
+ parent = SampleParent(i, parent);\r
+ }\r
+ \r
+ \r
+ TouchParent(parent, pos);\r
+ \r
+ \r
+ // Reset distance between segments\r
+ distance = 1;\r
+ startPiece=pos;\r
+\r
+ //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);\r
+ \r
+ // Check if we sampled a new ancestral segment ...\r
+ if (parents[parent][block][pos_in_block] == -1)\r
+ {\r
+ // We delay sampling a new gene from the pool until we know\r
+ // a coalescent event occured ...\r
+ parents[parent][block][pos_in_block] = children[i][block][pos_in_block];\r
+ \r
+ }\r
+ // Or if a coalescent event occured\r
+ else\r
+ {\r
+ // If we previously delayed sampling this parent\r
+ if (parents[parent][block][pos_in_block] < generationStart)\r
+ {\r
+ genetimes[nextFree] = generation;\r
+ genepool[parents[parent][block][pos_in_block]] = nextFree;\r
+ parents[parent][block][pos_in_block] = nextFree++;\r
+\r
+ if (nextFree == poolsize && units != 2)\r
+ error("Genepool exhausted\n");\r
+ }\r
+\r
+ genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];\r
+ \r
+ fragments[pos]--;\r
+ units--;\r
+\r
+ if (fragments[pos] == 1)\r
+ {\r
+ // Reached the MRCA for this fragment\r
+ MRCAtimes[pos] = generation;\r
+ MRCA[pos] = parents[parent][block][pos_in_block];\r
+ \r
+ genepool[parents[parent][block][pos_in_block]]=0;\r
+ \r
+ parents[parent][block][pos_in_block] = -1;\r
+ units--;\r
+ \r
+ // One less segment to track\r
+ if (--activeSegments == 0)\r
+ {\r
+ done = true;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+ pos++;\r
+ }\r
+ }\r
+ }\r
+ \r
+ NewGeneration();\r
+ \r
+ }\r
+ \r
+ if(drawtree){\r
+ cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;\r
+ drawtrees();\r
+ } \r
+ \r
+ // assign mutations\r
+\r
+ cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;\r
+ \r
+ FreeCoalescentMemory();\r
+ \r
+ cout<<"Coalescent Memory free"<<endl;\r
+ \r
+ InitializeMutation(return_chromosome);\r
+\r
+ cout<<"Mutation process initialized"<<endl;\r
+ \r
+// cout<<"genepool:\t";\r
+// for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
+// cout<<endl;\r
+// \r
+// cout<<"genetimes:\t";\r
+// for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
+// cout<<endl;\r
+// \r
+// cout<<"nextFree="<<nextFree<<endl;\r
+ \r
+// cout<<"mutation:\t";\r
+// for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
+// cout<<endl;\r
+ \r
+ if(SNP<=0){\r
+ for(int i=0;i<nextFree;i++){\r
+ if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation); \r
+ }\r
+ }\r
+ else{\r
+ unsigned long * cumulativeCount = new unsigned long [nextFree];\r
+ \r
+ if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];\r
+ else cumulativeCount[0] = 0;\r
+ \r
+ for(int i=1;i<nextFree;i++){\r
+ if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i]; \r
+ else cumulativeCount[i] = cumulativeCount[i-1]; \r
+ }\r
+ \r
+ unsigned long totalGeneration = cumulativeCount[nextFree - 1];\r
+ \r
+// cout<<"Total Generation = "<<totalGeneration<<endl;\r
+// \r
+// if((unsigned long)SNP>totalGeneration){\r
+// cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl; \r
+// }\r
+ \r
+ for(int i=0;i<SNP;i++){\r
+ \r
+ unsigned long randomLong = globalRandom.NextInt()%totalGeneration;\r
+ \r
+ int begin = 0;\r
+ int end = nextFree - 1;\r
+ int middle = (begin+end)/2;\r
+ \r
+ while(1){\r
+ if(cumulativeCount[middle]<randomLong){\r
+ begin = middle;\r
+ }\r
+ else{\r
+ end = middle;\r
+ }\r
+ \r
+ if(end == begin+1)break;\r
+ \r
+ middle = (begin+end)/2;\r
+ }\r
+ \r
+ if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){\r
+ cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl; \r
+ error("see above message!\n");\r
+ }\r
+ \r
+ int sampleIndex;\r
+ \r
+ if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;\r
+ else sampleIndex = end;\r
+ \r
+ if(genepool[sampleIndex]==0){\r
+ cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl; \r
+ error("see above message!\n");\r
+ }\r
+ \r
+ branchMutation[sampleIndex]++;\r
+ }\r
+ \r
+ delete [] cumulativeCount;\r
+ \r
+ }\r
+ \r
+ delete [] genetimes;\r
+ \r
+ cout<<"Mutation assigned"<<endl;\r
+ \r
+// cout<<"mutation:\t";\r
+// for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
+// cout<<endl;\r
+\r
+ AllocateMutationMemory();\r
+ \r
+ cout<<"Mutation Memory allocated"<<endl;\r
+ \r
+ PrepareMutation();\r
+ \r
+ for(int i=0;i<n;i++){\r
+ for(int j=0;j<L;j++){\r
+ if(branchMutation[i*L+j]>0){\r
+ geneIndex[i*L+j] = fragmentLength[j];\r
+ fragmentLength[j] += branchMutation[i*L+j];\r
+ }\r
+ branchFragment[i*L+j] = j;\r
+ }\r
+ } \r
+ \r
+ \r
+ for(int i=0;i<nextFree;i++){\r
+ if(genepool[genepool[i]]!=0 && genepool[i]!=0){\r
+ if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){\r
+ geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];\r
+ fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];\r
+ }\r
+ branchFragment[genepool[i]]=branchFragment[i];\r
+ }\r
+ }\r
+ \r
+ //cout<<"branchfragment done"<<endl;\r
+ \r
+// cout<<"branchFragment:\t";\r
+// for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";\r
+// cout<<endl;\r
+\r
+// cout<<"geneIndex:\t";\r
+// for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";\r
+// cout<<endl;\r
+// \r
+// cout<<"fragmentLength:\t";\r
+// for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";\r
+// cout<<endl;\r
+ \r
+\r
+// cout<<"genepool genetime mutation frag index\n";\r
+// for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;\r
+// cout<<endl;\r
+ \r
+\r
+ \r
+ delete [] branchFragment;\r
+ \r
+ cout<<"Internal memory free"<<endl;\r
+ \r
+ // print the fragment index for each SNP\r
+ cout<<"SNP genetic position scaled to [0,1]:"<<endl;\r
+ if(L>1){\r
+ for(int i=0;i<L;i++){\r
+ for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" "; \r
+ }\r
+ }\r
+ else{\r
+ cout<<"Only one fragment to be simulated. No recombination between SNPs.";\r
+ }\r
+ cout<<endl;\r
+ \r
+ int totalLength=0;\r
+ if(SNP <=0){ \r
+ for(int i=0;i<L;i++)totalLength += fragmentLength[i];\r
+ }\r
+ \r
+ \r
+ for(int i=0;i<n;i++){\r
+ for(int j=0;j<L;j++){\r
+ chromosome[i][j].resize(fragmentLength[j],0);\r
+\r
+ for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;\r
+ \r
+ int current=i*L+j;\r
+ \r
+ while(genepool[genepool[current]]!=0){\r
+ for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;\r
+ current=genepool[current];\r
+ }\r
+ }\r
+ }\r
+ \r
+ cout<<"Chromosome done"<<endl;\r
+ \r
+ FreeMutationMemory();\r
+ \r
+ if(SNP <=0){ \r
+ for(int i=0;i<n;i++){\r
+ return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);\r
+ }\r
+ }\r
+ \r
+ for(int i=0;i<n;i++){\r
+ for(int j=0;j<L;j++){\r
+ return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());\r
+ }\r
+ } \r
+ \r
+ cout<<"Return chromosome done"<<endl;\r
+ \r
+ // print the chromosomes\r
+// for(int i=0;i<n;i++){\r
+// printf("Chr %d: ",i);\r
+// for(int j=0;j<L;j++){\r
+// for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];\r
+// cout<<" "; \r
+// }\r
+// cout<<endl;\r
+// } \r
+// \r
+// \r
+// for(int i=0;i<n;i++){\r
+// printf("return Chr %d: ",i);\r
+// for(int j=0;j<return_chromosome[i].size();j++){\r
+// cout<<return_chromosome[i][j];\r
+// }\r
+// cout<<endl;\r
+// }\r
+\r
+ \r
+ \r
+ }\r
+ \r
+ FreeMemory();\r
+ \r
+ printf("\nDone simulating ARG ...\n");\r
+ \r
+ \r
+ \r
+}\r
+\r
+\r
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// genome.h \r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#include <vector>\r
+#include <string>\r
+\r
+using namespace std;\r
+\r
+void genome(string popSize, // effective population size of each population\r
+ int nSubPOP, // number of populations\r
+ vector<int> & nSubSample, // number of samples (haplotypes) draw from each populations\r
+ int numPieces, // number of fragments for each sample (chromosome)\r
+ int pieceLen, // length in base pair of each fragment\r
+ int numIndepRegion, // number of independent regions (independent chromosome)\r
+ int s, // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy \r
+ string rec, // recombination rate between consecutive fragments per generation\r
+ double mut, // mutation rate per generation per base pair\r
+ double mig, // migration rate per generation\r
+ vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
+ long t, // random seed \r
+ int drawtree, // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees \r
+ bool printparameters =false // =1 print out all input parameters, =0 do not print input parameters\r
+ );\r
+\r
+\r
+\r
+ \r
--- /dev/null
+# module name
+NAME = genome
+
+# switches
+SW = -O2 -Wall
+
+# libreries
+LIB = -lm
+
+# compiler
+CC = g++
+
+# main module
+
+OBJ = Main.o genome.o Random.o Error.o stochastic.o
+
+$(NAME): $(OBJ)
+ $(CC) -o $(NAME) $(OBJ) $(LIB) $(SW)
+
+.cpp.o :
+ $(CC) $(SW) -o $@ -c $*.cpp
+
+.SUFFIXES : .cpp .c .o $(SUFFIXES)
+
+clean :
+ rm -rf *.o core*
+
--- /dev/null
+# module name
+NAME = genome-32bit
+
+# switches
+SW = -O2 -Wall -m32
+
+# libreries
+LIB = -lm
+
+# compiler
+CC = g++
+
+# main module
+
+OBJ = Main.o genome.o Random.o Error.o stochastic.o
+
+$(NAME): $(OBJ)
+ $(CC) -o $(NAME) $(OBJ) $(LIB) $(SW)
+
+.cpp.o :
+ $(CC) $(SW) -o $@ -c $*.cpp
+
+.SUFFIXES : .cpp .c .o $(SUFFIXES)
+
+clean :
+ rm -rf *.o core*
+
--- /dev/null
+0 500 500 500
+1-1 2-1 3-2
+10 700 300
+1-1 1-2 2-3
+20 500 200 300
+1-1 2-1 3-1
+30 500
\ No newline at end of file
--- /dev/null
+
+GENOME: coalescent-based whole genome simulator
+(c) 2006-2009 Liming Liang, Goncalo Abecasis
+
+
+
+DISCLAIMER
+==========
+
+This is version of GENOME is provided to you free of charge. It comes with
+no guarantees. If you report bugs and provide helpful comments, the next
+version will be better.
+
+
+DISTRIBUTION
+============
+
+The latest version of GENOME is available at:
+
+ http://www.sph.umich.edu/csg/liang/genome
+
+If you use GENOME please register by e-mailing lianglim@umich.edu.
+
+Code for GENOME is subject to specific
+license conditions -- please see file LICENSE.GENOME for additional
+information.
+
+Code for the Mersenne Twister random number generator is subject to specific
+license conditions -- please see file LICENSE.twister for additional
+information.
+
+
+
+
+BRIEF INSTRUCTIONS
+==================
+
+A complete manual is available at:
+
+ http://www.sph.umich.edu/csg/liang/genome
+
+
+Parameters and default values are listed below:
+
+ -pop number of subpopulations and size of subsamples [ 2 10 10 ]
+ -N effective size of each subpopulation or the filename for population profile [10000]
+ -c number of independent regions (chromosomes) to simulate [1]
+ -pieces number of fragments per independent region [100]
+ -len length in base of each fragment [10000]
+ -s fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [-1]
+ -rec recombination rate bewteen consecutive fragments or the filename for recombination rate distribution [0.0001]
+ -mut mutation rate per generation per base pair [1e-08]
+ -mig migration rate per generation per individual [0.00025]
+ -seed random seed, -1 = use time as the seed [-1]
+ -tree 1=draw the genealogy trees, 0=do not output [0]
+ -maf Output SNPs with minor allele frequence greater than [0]
+ -prop To keep this proportion of SNPs with MAF < the value of -maf parameter [1]
+
+
+
+An example command line is:
+
+ genome -pop 2 3 5 -N 100 -tree 1
+
+This command will simulate 3 and 5 sequences from two populations respectively.
+Each population is of size 100. Other parameters will take the default values listed in "[]".
+The genealogy trees in Newick format will be output.
+
+recombination.txt contains an example of the recombination rates profile.
+population.txt contains an example of the population history profile.
+For detail of these two files please refer to the -N and -rec parameters in the manual.
+
+
+REFERENCE
+=========
+Liang L., Zollner S., Abecasis G.R. (2007) GENOME: a rapid coalescent-based whole genome simulator.
+Bioinformatics 23(12):1565-7
+
+
+
+Comments and suggestions are welcome!
+
+Liming Liang
+lianglim@umich.edu
--- /dev/null
+Rec Pos
+0.0001 1
+0.001 3
+0.5 5
+0.7 6
+0.9 7
--- /dev/null
+Rec Freq
+0.0001 9
+0.001 1
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// stochastic.cpp \r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+\r
+#include "Random.h"\r
+#include <math.h>\r
+#include <iostream>\r
+\r
+using namespace std;\r
+\r
+#define Pi 3.1415926535897932\r
+\r
+double lngamma(double z)\r
+{\r
+ double result,sum;\r
+ static double c[7]={1.000000000190015, 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};\r
+ \r
+ sum=c[0];\r
+ for(int i=1;i<=6;i++)sum += c[i]/(z+i);\r
+ \r
+ result = (z+0.5)*log(z+5.5)-(z+5.5)+log(2.5066282746310005*sum/z);\r
+ \r
+ return result;\r
+}\r
+\r
+\r
+\r
+int poissonInt(double lambda) \r
+{\r
+ static double mean=-1, waitTime,s,logmean,c;\r
+ double count, eventTime,ratio,y;\r
+ \r
+ static double maxInt=pow(2.0,(double)(8*sizeof(int)-1))-1;\r
+ \r
+ if(lambda <0){\r
+ cout<<"The mean of Poisson should be >=0, input="<<lambda<<endl;\r
+ exit(1); \r
+ }\r
+ else if(lambda < 12){\r
+ if(lambda != mean){\r
+ mean = lambda;\r
+ waitTime = exp(-lambda); \r
+ }\r
+ \r
+ count = 0;\r
+ eventTime = globalRandom.Next();\r
+ \r
+ while(eventTime > waitTime){\r
+ count++;\r
+ eventTime *= globalRandom.Next(); \r
+ }\r
+ }\r
+ else{\r
+ if(lambda != mean){\r
+ mean = lambda;\r
+ s=sqrt(2*lambda);\r
+ logmean=log(lambda);\r
+ c=lambda*log(lambda)-lngamma(lambda+1);\r
+ }\r
+ \r
+ do{\r
+ do{\r
+ y=tan(Pi*globalRandom.Next());\r
+ count = floor(s*y+lambda);\r
+ }while(count<0);\r
+ \r
+ ratio = 0.9*(1+y*y)*exp(count*logmean-lngamma(count+1)-c);\r
+ \r
+ }while(globalRandom.Next()>ratio);\r
+ \r
+ }\r
+ \r
+ if(count>maxInt)count=maxInt;\r
+ \r
+ return (int)count; \r
+}\r
+\r
+\r
+\r
+\r
--- /dev/null
+////////////////////////////////////////////////////////////////////// \r
+// stochastic.h \r
+//////////////////////////////////////////////////////////////////////////////\r
+// COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions\r
+// are met:\r
+//\r
+// 1. Redistributions of source code must retain the above copyright\r
+// notice, this list of conditions and the following disclaimer.\r
+//\r
+// 2. Redistributions in binary form must reproduce the above copyright\r
+// notice, this list of conditions and the following disclaimer in the\r
+// documentation and/or other materials provided with the distribution.\r
+//\r
+// 3. The names of its contributors may not be used to endorse or promote\r
+// products derived from this software without specific prior written\r
+// permission.\r
+//\r
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+int poissonInt(double lambda);\r
+\r
+\r
+ \r