]> git.donarmstrong.com Git - genome.git/commitdiff
import initial version of genome
authorDon Armstrong <don@donarmstrong.com>
Fri, 7 Feb 2014 23:18:51 +0000 (15:18 -0800)
committerDon Armstrong <don@donarmstrong.com>
Fri, 7 Feb 2014 23:18:51 +0000 (15:18 -0800)
18 files changed:
Error.cpp [new file with mode: 0644]
Error.h [new file with mode: 0644]
LICENSE.GENOME [new file with mode: 0644]
LICENSE.twister [new file with mode: 0644]
Main.cpp [new file with mode: 0644]
MathConstant.h [new file with mode: 0644]
Random.cpp [new file with mode: 0644]
Random.h [new file with mode: 0644]
genome.cpp [new file with mode: 0644]
genome.h [new file with mode: 0644]
makefile [new file with mode: 0644]
makefile-32bitsys [new file with mode: 0644]
population.txt [new file with mode: 0644]
readme.txt [new file with mode: 0644]
recombination-pos.txt [new file with mode: 0644]
recombination.txt [new file with mode: 0644]
stochastic.cpp [new file with mode: 0644]
stochastic.h [new file with mode: 0644]

diff --git a/Error.cpp b/Error.cpp
new file mode 100644 (file)
index 0000000..5701232
--- /dev/null
+++ b/Error.cpp
@@ -0,0 +1,88 @@
+////////////////////////////////////////////////////////////////////// \r
+// Error.cpp \r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+#include "Error.h"\r
+\r
+#include "stdlib.h"\r
+#include "stdarg.h"\r
+#include "stdio.h"\r
+\r
+// Declare a dummy class to ensure that compilers recognize this as C++ code\r
+class String;\r
+\r
+void error ( const char * msg, ... )\r
+   {\r
+   va_list  ap;\r
+\r
+   va_start(ap, msg);\r
+\r
+   printf("\nFATAL ERROR - \n");\r
+   vprintf(msg, ap);\r
+   printf("\n\n");\r
+\r
+   va_end(ap);\r
+\r
+   exit(EXIT_FAILURE);\r
+   }\r
+\r
+void warning ( const char * msg, ... )\r
+   {\r
+   va_list  ap;\r
+\r
+   va_start(ap, msg);\r
+\r
+   printf("\n\aWARNING - \n");\r
+   vprintf(msg, ap);\r
+   printf("\n");\r
+\r
+   va_end(ap);\r
+   }\r
+\r
+void numerror ( const char * msg , ... )\r
+   {\r
+   va_list  ap;\r
+\r
+   va_start(ap, msg);\r
+\r
+   printf("\nFATAL NUMERIC ERROR - ");\r
+   vprintf(msg, ap);\r
+   printf("\n\n");\r
+\r
+   va_end(ap);\r
+\r
+   exit(EXIT_FAILURE);\r
+   }\r
diff --git a/Error.h b/Error.h
new file mode 100644 (file)
index 0000000..7235025
--- /dev/null
+++ b/Error.h
@@ -0,0 +1,55 @@
+////////////////////////////////////////////////////////////////////// \r
+// Error.h\r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#ifndef _ERROR_H_\r
+#define _ERROR_H_\r
+\r
+// #ifdef __cplusplus\r
+// extern "C" {\r
+// #endif\r
+\r
+void error(const char * msg, ...);\r
+void warning(const char * msg, ...);\r
+void numerror(const char * msg, ...);\r
+\r
+// #ifdef __cplusplus\r
+//   };\r
+// #endif\r
+\r
+\r
+#endif\r
diff --git a/LICENSE.GENOME b/LICENSE.GENOME
new file mode 100644 (file)
index 0000000..69e9ac4
--- /dev/null
@@ -0,0 +1,36 @@
+\r
+COPYRIGHT NOTICE FOR GENOME\r
+=====================================\r
+\r
+   GENOME coded by Liming Liang and Goncalo Abecasis.   \r
+\r
+   Copyright (C) 2006 - 2007, Liming Liang and Goncalo Abecasis,\r
+   All rights reserved.                          \r
+\r
+   Redistribution and use in source and binary forms, with or without\r
+   modification, are permitted provided that the following conditions\r
+   are met:\r
+\r
+     1. Redistributions of source code must retain the above copyright\r
+        notice, this list of conditions and the following disclaimer.\r
+\r
+     2. Redistributions in binary form must reproduce the above copyright\r
+        notice, this list of conditions and the following disclaimer in the\r
+        documentation and/or other materials provided with the distribution.\r
+\r
+     3. The names of its contributors may not be used to endorse or promote \r
+        products derived from this software without specific prior written \r
+        permission.\r
+\r
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+\r
diff --git a/LICENSE.twister b/LICENSE.twister
new file mode 100644 (file)
index 0000000..2d5b11b
--- /dev/null
@@ -0,0 +1,37 @@
+Mersenne twister code is included in the file Random.cpp
+
+COPYRIGHT NOTICE FOR MERSENNE TWISTER
+=====================================
+
+   Mersenne twister coded by Takuji Nishimura and Makoto Matsumoto.   
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.                          
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote 
+        products derived from this software without specific prior written 
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/Main.cpp b/Main.cpp
new file mode 100644 (file)
index 0000000..7beabb9
--- /dev/null
+++ b/Main.cpp
@@ -0,0 +1,211 @@
+////////////////////////////////////////////////////////////////////// \r
+// Main.cpp \r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#include <iostream>\r
+#include <math.h>\r
+#include <vector> \r
+#include <time.h>\r
+#include "genome.h"\r
+#include "Random.h"\r
+#include <stdio.h> \r
+#include <iomanip>\r
+    \r
+using namespace std;\r
+\r
+\r
+int main(int argc, char ** argv)\r
+   {\r
+       string popSize("10000"), rec("0.0001");\r
+       \r
+       int numPieces=100, pieceLen=10000, numIndepRegion=1, SNP=-1;\r
+       int nSubPOP=2; \r
+       vector<int> nSubSample(nSubPOP);\r
+       for(int i=0;i<nSubPOP;i++)nSubSample[i]=10;\r
+       \r
+       double mut=1e-8, mig=2.5e-4;\r
+       double maf=0.0,prop=1.0;\r
+       \r
+       vector< vector<bool> > data;\r
+\r
+       long seed=-1;\r
+       \r
+       int drawtree=0;\r
+\r
+       if(argc==1){\r
+               cout << endl  << "GENOME-0.2: Whole Genome Coalescent Simulator. (2009.2) Liming Liang, Goncalo Abecasis"  << endl << endl;\r
+               cout << "     Parameters and default values:" << endl;\r
+               cout << "     -pop     " << "number of subpopulations and size of subsamples [ " <<nSubPOP<<" "; \r
+                                                                       for(int i=0;i<nSubPOP;i++)cout<<nSubSample[i]<<" ";cout<<"]"<< endl;\r
+               cout << "     -N       " << "effective size of each subpopulation or the filename for population profile [" <<popSize.c_str()<<"]"<< endl;\r
+               cout << "     -c       " << "number of independent regions (chromosomes) to simulate [" <<numIndepRegion<<"]"<< endl;\r
+               cout << "     -pieces  " << "number of fragments per independent region [" <<numPieces<<"]"<< endl;\r
+               cout << "     -len     " << "length in base of each fragment [" <<pieceLen<<"]"<< endl;\r
+               cout << "     -s       " << "fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [" <<SNP<<"]"<< endl;\r
+               cout << "     -rec     " << "recombination rate bewteen consecutive fragments"<<" or the filename for recombination rate distribution ["<<rec.c_str()<<"]"<< endl;\r
+               cout << "     -mut     " << "mutation rate per generation per base pair [" <<mut<<"]"<< endl;\r
+               cout << "     -mig     " << "migration rate per generation per individual [" <<mig<<"]"<< endl;\r
+               cout << "     -seed    " << "random seed, -1 = use time as the seed [" <<seed<<"]"<< endl;\r
+               cout << "     -tree    " << "1=draw the genealogy trees, 0=do not output [" <<drawtree<<"]"<< endl;\r
+               cout << "     -maf     " << "Output SNPs with minor allele frequency greater than [" <<maf<<"]"<< endl;\r
+               cout << "     -prop    " << "To keep this proportion of SNPs with MAF < the value of -maf parameter [" <<prop<<"]"<< endl;\r
+               exit(0);\r
+       }\r
\r
+\r
+       int k=0;\r
+    for(int i=1;i<argc;i++){\r
+        if(strcmp(argv[i],"-pop")==0){ \r
+               nSubPOP=atoi(argv[++i]); k++;\r
+               nSubSample.clear();\r
+               nSubSample.resize(nSubPOP);\r
+               for(int iter=0;iter<nSubPOP;iter++)nSubSample[iter]=atoi(argv[++i]);\r
+           }\r
+           else if(strcmp(argv[i],"-N")==0){ popSize.assign(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-c")==0){ numIndepRegion=atoi(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-pieces")==0){ numPieces=atoi(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-len")==0){ pieceLen=atoi(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-s")==0){ SNP=atoi(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-rec")==0){ rec.assign(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-mut")==0){ mut=atof(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-mig")==0){ mig=atof(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-seed")==0){ seed=atol(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-tree")==0){ drawtree=atoi(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-maf")==0){ maf=atof(argv[++i]); k++;}\r
+           else if(strcmp(argv[i],"-prop")==0){ prop=atof(argv[++i]); k++;}\r
+    }\r
+    \r
+    if(k!=(argc-1)/2 && k!=(argc-1-nSubPOP)/2){\r
+        cout << "Parameters do not match! " << argc<< " " << k << endl; \r
+        exit(1);\r
+    } \r
+\r
+    cout << endl \r
+           << "GENOME-0.2: Whole Genome Coalescent Simulator. (2009.2) Liming Liang, Goncalo Abecasis"  << endl << endl;\r
+       cout << "     Parameters and effective values:" << endl;\r
+       cout << "     -pop     " << "number of subpopulations and size of subsamples [ " <<nSubPOP<<" "; \r
+                                                                       for(int i=0;i<nSubPOP;i++)cout<<nSubSample[i]<<" ";cout<<"]"<< endl;\r
+               cout << "     -N       " << "effective size of each subpopulation or the filename for population profile [" <<popSize.c_str()<<"]"<< endl;\r
+               cout << "     -c       " << "number of independent regions (chromosomes) to simulate [" <<numIndepRegion<<"]"<< endl;\r
+               cout << "     -pieces  " << "number of fragments per independent region [" <<numPieces<<"]"<< endl;\r
+               cout << "     -len     " << "length in base of each fragment [" <<pieceLen<<"]"<< endl;\r
+               cout << "     -s       " << "fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [" <<SNP<<"]"<< endl;\r
+               cout << "     -rec     " << "recombination rate bewteen consecutive fragments or the filename for recombination rate distribution ["<<rec.c_str()<<"]"<< endl;\r
+               cout << "     -mut     " << "mutation rate per generation per base pair [" <<mut<<"]"<< endl;\r
+               cout << "     -mig     " << "migration rate per generation per individual [" <<mig<<"]"<< endl;\r
+               cout << "     -seed    " << "random seed, -1 = use time as the seed [" <<seed<<"]"<< endl;\r
+               cout << "     -tree    " << "1=draw the genealogy trees, 0=do not output [" <<drawtree<<"]"<< endl;\r
+               cout << "     -maf     " << "Output SNPs with minor allele frequency greater than [" <<maf<<"]"<< endl;\r
+               cout << "     -prop    " << "To keep this proportion of SNPs with MAF < the value of -maf parameter [" <<prop<<"]"<< endl<<endl;\r
+\r
+       if(atoi(popSize.c_str())>0 && atof(rec.c_str())>0){     \r
+               printf("     Scaled recombination rate = %.4e\n", 2 * atoi(popSize.c_str()) * (numPieces - 1) * atof(rec.c_str()));\r
+               printf("     Scaled migration rate = %.4e\n", 2 * atoi(popSize.c_str()) * mig);\r
+               printf("     Scaled mutation rate = %.4e\n\n", 2 * atoi(popSize.c_str()) * mut * numPieces * pieceLen);\r
+       }\r
+       \r
+       genome(popSize, nSubPOP, nSubSample, numPieces, pieceLen, numIndepRegion, SNP, rec, mut, mig, data, seed,drawtree);\r
+       \r
+// output format for haploview\r
+\r
+//     for(int i=0;i<data.size();i++){\r
+//             printf("FAMILY1 MEMBER%d ",i/2+1);\r
+//             for(int j=0;j<data[i].size();j++){\r
+//                     cout<<data[i][j]+1<<" ";\r
+//             }\r
+//             cout<<endl;\r
+//     }\r
+\r
+\r
+// 0 = ancestral state, 1 = mutated state\r
+\r
+       cout<<"Total number of SNPs = "<<data[0].size()<<endl;\r
+       cout<<"Samples:"<<endl;\r
+       \r
+       if(maf>0.0 && prop < 1.0){ // filter out the SNPs with MAF < maf \r
+               \r
+               cout<<"Rare SNPs filtered according to parameters."<<endl;\r
+               \r
+               int n=0;\r
+               for(int k=0;k<nSubPOP;k++)n+=nSubSample[k];\r
+               \r
+               int m=(int)(data[0].size());\r
+               vector<bool> keep(m,true);\r
+                       \r
+               float p;\r
+               for(int i=0;i<m;i++){\r
+                       p=0.0;\r
+                       for(int j=0;j<n;j++){\r
+                               p += data[j][i];        \r
+                       }\r
+                       p /= n;\r
+                       \r
+                       if( (p<maf || (1-p)<maf) && globalRandom.Next()>prop)keep[i]=0;\r
+               }\r
+               \r
+\r
+               int index=0;\r
+               for(int k=0;k<nSubPOP;k++){\r
+                       for(int i=0;i<nSubSample[k];i++){\r
+                               cout<<"POP"<<k+1<<": ";\r
+                               for(unsigned int j=0;j<data[index].size();j++){\r
+                                       if(keep[j])cout<<data[index][j];\r
+                               }\r
+                               cout<<endl;\r
+                               index++;\r
+                       }       \r
+               }\r
+       \r
+       }\r
+       else{\r
+               int index=0;\r
+               for(int k=0;k<nSubPOP;k++){\r
+                       for(int i=0;i<nSubSample[k];i++){\r
+                               cout<<"POP"<<k+1<<": ";\r
+                               for(unsigned int j=0;j<data[index].size();j++){\r
+                                       cout<<data[index][j];\r
+                               }\r
+                               cout<<endl;\r
+                               index++;\r
+                       }       \r
+               }\r
+       }\r
+\r
+  \r
+}\r
+\r
+\r
diff --git a/MathConstant.h b/MathConstant.h
new file mode 100644 (file)
index 0000000..d471dac
--- /dev/null
@@ -0,0 +1,73 @@
+////////////////////////////////////////////////////////////////////// \r
+// MathConstant.h \r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+\r
+#ifndef __MATHCONSTANT_H__\r
+#define __MATHCONSTANT_H__\r
+\r
+#ifdef  _MSC_VER\r
+#define _USE_MATH_DEFINES \r
+#endif\r
+\r
+#include "math.h"\r
+#include "stdlib.h"\r
+\r
+// Constants for numerical routines\r
+//\r
+\r
+#define TINY    1.0e-30        // A small number\r
+#define ITMAX   200            // Maximum number of iterations\r
+#define EPS     3.0e-7         // Relative accuracy\r
+#define ZEPS    3.0e-10        // Precision around zero\r
+#define FPMIN   1.0e-30        // Number near the smallest representable number\r
+#define FPMAX   1.0e+100       // Number near the largest representable number\r
+#define TOL     1.0e-6         // Zero SVD values below this\r
+#define GOLD    0.61803399     // Golden ratio\r
+#define CGOLD   0.38196601     // Complement of golden ratio\r
+\r
+inline double square(double a)         { return a * a; }\r
+inline double sign(double a, double b) { return b >= 0 ? fabs(a) : -fabs(a); }\r
+inline double min(double a, double b)  { return a < b ? a : b; }\r
+inline double max(double a, double b)  { return a > b ? a : b; }\r
+\r
+inline int square(int a)      { return a * a; }\r
+inline int sign(int a, int b) { return b >= 0 ? abs(a) : -abs(a); }\r
+inline int min(int a, int b)  { return a < b ? a : b; }\r
+inline int max(int a, int b)  { return a > b ? a : b; }\r
+\r
+#endif\r
diff --git a/Random.cpp b/Random.cpp
new file mode 100644 (file)
index 0000000..a181e97
--- /dev/null
@@ -0,0 +1,387 @@
+
+//////////////////////////////////////////////////////////////////////////////
+// This file includes code derived from the original Mersenne Twister Code
+// by Makoto Matsumoto and Takuji Nishimura
+// and is subject to their original copyright notice copied below:
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+// COPYRIGHT NOTICE FOR MERSENNE TWISTER CODE
+// Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. The names of its contributors may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "Random.h"
+#include "MathConstant.h"
+#include "Error.h"
+
+#include <math.h>
+
+//Constants used internally by Mersenne random number generator
+#define MERSENNE_N 624
+#define MERSENNE_M 397
+
+// constant vector a
+#define MATRIX_A 0x9908b0dfUL
+
+// most significant w-r bits
+#define UPPER_MASK 0x80000000UL
+
+// least significant r bits
+#define LOWER_MASK 0x7fffffffUL
+
+
+// Constants used internally by Park-Miller random generator
+#define IA 16807
+#define IM 2147483647
+#define AM (1.0 / IM)
+#define IQ 127773
+#define IR 2836
+#define NTAB 32
+#define NDIV (1+(IM-1)/NTAB)
+#define RNMX (1.0-EPS)
+
+Random::Random(long s)
+   {
+#ifndef __NO_MERSENNE
+   mt = new unsigned long [MERSENNE_N];
+   mti = MERSENNE_N + 1;
+   mersenneMult = 1.0/4294967296.0;
+#else
+   shuffler = new long [NTAB];
+#endif
+   Reset(s);
+   }
+
+Random::~Random()
+   {
+#ifndef __NO_MERSENNE
+   delete [] mt;
+#else
+   delete [] shuffler;
+#endif
+   }
+
+void Random::Reset(long s)
+   {
+   normSaved = 0;
+
+#ifndef __NO_MERSENNE
+   InitMersenne(s);
+#else
+   // 'Continuous' Random Generator
+   if ((seed = s) < 1)
+      seed = s == 0 ? 1 : -s;  // seed == 0 would be disastrous
+
+   for (int j=NTAB+7; j>=0; j--)  // Warm up and set shuffle table
+      {
+      long k = seed / IQ;
+      seed = IA * (seed - k * IQ) - IR * k;
+      if (seed < 0) seed += IM;
+      if (j < NTAB) shuffler[j] = seed;
+      }
+   last=shuffler[0];
+#endif
+   }
+
+// initializes mt[MERSENNE_N] with a seed
+void Random::InitMersenne(unsigned long s)
+   {
+   mt[0]= s & 0xffffffffUL;
+   for (mti = 1; mti < MERSENNE_N; mti++)
+      {
+      mt[mti] = (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+      /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+      /* In the previous versions, MSBs of the seed affect */
+      /* only MSBs of the array mt[].  */
+      /* 2002/01/09 modified by Makoto Matsumoto */
+
+      mt[mti] &= 0xffffffffUL;
+      }
+   }
+
+int Random::Binary()
+   {
+   return Next() > 0.5 ? 1 : 0;
+   }
+
+#ifndef __NO_MERSENNE
+
+double Random::Next()
+   {
+   unsigned long y;
+
+   // mag01[x] = x * MATRIX_A for x=0,1
+   static unsigned long mag01[2]={0x0UL, MATRIX_A};
+
+   if (mti >= MERSENNE_N)
+      {
+      /* generate MERSENNE_N words at one time */
+      int kk;
+
+      // If InitMersenne() has not been called, a default initial seed is used
+      if (mti == MERSENNE_N+1)
+         InitMersenne(5489UL);
+
+      for (kk=0; kk < MERSENNE_N-MERSENNE_M; kk++)
+         {
+         y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+         mt[kk] = mt[kk+MERSENNE_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+         }
+
+      for (; kk < MERSENNE_N-1; kk++)
+         {
+         y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+         mt[kk] = mt[kk+(MERSENNE_M - MERSENNE_N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+         }
+
+      y = (mt[MERSENNE_N-1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+      mt[MERSENNE_N-1] = mt[MERSENNE_M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+      mti = 0;
+      }
+
+   y = mt[mti++];
+
+   // Tempering
+   y ^= (y >> 11);
+   y ^= (y << 7) & 0x9d2c5680UL;
+   y ^= (y << 15) & 0xefc60000UL;
+   y ^= (y >> 18);
+
+   return (mersenneMult * ((double) y + 0.5));
+   }
+
+// Generates a random number on [0,0xffffffff]-interval
+
+unsigned long Random::NextInt()
+   {
+   unsigned long y;
+
+   // mag01[x] = x * MATRIX_A for x=0,1
+   static unsigned long mag01[2]={0x0UL, MATRIX_A};
+
+   if (mti >= MERSENNE_N)
+      {
+      /* generate MERSENNE_N words at one time */
+      int kk;
+
+      // If InitMersenne() has not been called, a default initial seed is used
+      if (mti == MERSENNE_N + 1)
+         InitMersenne(5489UL);
+
+      for (kk= 0; kk < MERSENNE_N - MERSENNE_M; kk++)
+         {
+         y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+         mt[kk] = mt[kk+MERSENNE_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+         }
+
+      for (; kk< MERSENNE_N-1; kk++)
+         {
+         y = (mt[kk] & UPPER_MASK) | (mt[kk+1] & LOWER_MASK);
+         mt[kk] = mt[kk+(MERSENNE_M - MERSENNE_N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+         }
+
+      y = (mt[MERSENNE_N-1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+      mt[MERSENNE_N-1] = mt[MERSENNE_M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+      mti = 0;
+      }
+
+   y = mt[mti++];
+
+   // Tempering
+   y ^= (y >> 11);
+   y ^= (y << 7) & 0x9d2c5680UL;
+   y ^= (y << 15) & 0xefc60000UL;
+   y ^= (y >> 18);
+
+   return y;
+   }
+
+#else
+
+double Random::Next()
+   {
+   // Compute seed = (IA * seed) % IM without overflows
+   // by Schrage's method
+   long k = seed / IQ;
+   seed = IA * (seed - k * IQ) - IR * k;
+   if (seed < 0) seed += IM;
+
+   // Map to 0..NTAB-1
+   int j = last/NDIV;
+
+   // Output value is shuffler[j], which is in turn replaced by seed
+   last = shuffler[j];
+   shuffler[j] = seed;
+
+   // Map to 0.0 .. 1.0 excluding endpoints
+   double temp = AM * last;
+   if (temp > RNMX) return RNMX;
+   return temp;
+   }
+
+unsigned long Random::NextInt()
+   {
+   // Compute seed = (IA * seed) % IM without overflows
+   // by Schrage's method
+   long k = seed / IQ;
+   seed = IA * (seed - k * IQ) - IR * k;
+   if (seed < 0) seed += IM;
+
+   // Map to 0..NTAB-1
+   int j = last/NDIV;
+
+   // Output value is shuffler[j], which is in turn replaced by seed
+   last = shuffler[j];
+   shuffler[j] = seed;
+
+   return last;
+   }
+
+#endif
+
+double Random::Normal()
+   {
+   double v1, v2, fac, rsq;
+
+   if (!normSaved)  // Do we need new numbers?
+      {
+      do {
+      v1 = 2.0 * Next() - 1.0;  // Pick two coordinates from
+      v2 = 2.0 * Next() - 1.0;  // -1 to +1 and check if they
+      rsq = v1*v1 + v2*v2;  // are in unit circle...
+      } while (rsq >= 1.0 || rsq == 0.0);
+
+      fac = sqrt(-2.0 * log(rsq)/rsq);  // Apply the Box-Muller
+      normStore = v1 * fac;  // transformation and save
+      normSaved = 1;  // one deviate for next time
+      return v2 * fac;
+      }
+   else
+      {
+      normSaved = 0;
+      return normStore;
+      }
+   }
+
+void Random::Choose(int * array, int n, int k)
+   {
+   int choices = 1, others = 0;
+
+   if (k > n / 2)
+      {
+      choices = 0;
+      others = 1;
+      k = n - k;
+      }
+
+   for (int i = 0; i < n; i++)
+      array[i] = others;
+
+   while (k > 0)
+      {
+      int i = NextInt() % n;
+
+      if (array[i] == choices) continue;
+
+      array[i] = choices;
+      k--;
+      }
+   }
+
+void Random::Choose(int * array, float * weights, int n, int k)
+   {
+   int choices = 1, others = 0;
+
+   if (k > n / 2)
+      {
+      choices = 0;
+      others = 1;
+      k = n - k;
+      }
+
+   // First calculate cumulative sums of weights ...
+   float * cumulative = new float [n + 1];
+
+   cumulative[0] = 0;
+   for (int i = 1; i <= n; i++)
+      cumulative[i] = cumulative[i - 1] + weights[i - 1];
+
+   float & sum = cumulative[n], reject = 0.0;
+
+   for (int i = 0; i < n; i++)
+      array[i] = others;
+
+   while (k > 0)
+      {
+      float weight = (float)(Next() * sum);
+
+      int hi = n, lo = 0, i = 0;
+
+      while (hi >= lo)
+         {
+         i = (hi + lo) / 2;
+
+         if (cumulative[i + 1] <= weight)
+            lo = i + 1;
+         else if (cumulative[i] >= weight)
+            hi = i - 1;
+         else break;
+         }
+
+      if (array[i] == choices) continue;
+
+      array[i] = choices;
+      reject += weights[i];
+
+      // After selecting a substantial number of elements, update the cumulative
+      // distribution -- to ensure that at least half of our samples produce a hit
+      if (reject > sum * 0.50)
+         {
+         cumulative[0] = 0;
+         for (int i = 1; i <= n; i++)
+            if (array[i] != choices)
+               cumulative[i] = cumulative[i - 1] + weights[i - 1];
+            else
+               cumulative[i] = cumulative[i - 1];
+
+         reject = 0.0;
+         }
+
+      k--;
+      }
+   }
+
+Random globalRandom;
+
+
diff --git a/Random.h b/Random.h
new file mode 100644 (file)
index 0000000..cc6900f
--- /dev/null
+++ b/Random.h
@@ -0,0 +1,115 @@
+
+//////////////////////////////////////////////////////////////////////////////
+// This file includes code derived from the original Mersenne Twister Code
+// by Makoto Matsumoto and Takuji Nishimura
+// and is subject to their original copyright notice copied below:
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+//              COPYRIGHT NOTICE FOR MERSENNE TWISTER CODE
+//
+// Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+// All rights reserved.
+//
+//   Redistribution and use in source and binary forms, with or without
+//   modification, are permitted provided that the following conditions
+//   are met:
+//
+//     1. Redistributions of source code must retain the above copyright
+//        notice, this list of conditions and the following disclaimer.
+//
+//     2. Redistributions in binary form must reproduce the above copyright
+//        notice, this list of conditions and the following disclaimer in the
+//        documentation and/or other materials provided with the distribution.
+//
+//     3. The names of its contributors may not be used to endorse or promote
+//        products derived from this software without specific prior written
+//        permission.
+//
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef __RANDOM_H__
+#define __RANDOM_H__
+
+// Define a quick and dirty generator
+#define RANDMUL 1664525L
+#define RANDADD 1013904223L
+
+#define RAND(seed) ((seed = seed * RANDMUL + RANDADD) & 0xFFFFFFFF)
+
+class Random
+// Implements the Mersenne Twister as default random number generator.
+// Compilation flag __NO_MERSENNE sets default generator to
+// a minimal Park-Miller with Bays-Durham shuffle and added safe guards.
+   {
+   protected:
+      // values for "minimal random values"
+      long  seed;
+      long  last;
+      long  * shuffler;
+
+      // and for normal deviates
+      int      normSaved;
+      double   normStore;
+
+      double mersenneMult;
+
+      // Array for Mersenne state vector
+      unsigned long * mt;
+
+      // Used to signal that Mersenne state vector is not initialized
+      int mti;
+
+
+   public:
+
+      Random(long s = 0x7654321);
+      ~Random();
+
+      // Next bit in series of 0s and 1s
+      int    Binary();     // Next bit in series of 0s and 1s
+
+      // Next value in series, between 0 and 1
+      double Next();
+
+      // Next integer
+      unsigned long NextInt();
+
+      // Random number form N(0,1)
+      double Normal();
+
+      void   Reset(long s);
+      void   InitMersenne(unsigned long s);
+
+      // Random number between 0 and 1
+      operator double()
+         { return Next(); }
+
+      // Random number between arbitrary bounds
+      double Uniform(double lo = 0.0, double hi = 1.0)
+         {
+         return lo + (hi - lo) * Next();
+         }
+
+      void Choose(int * array, int n, int k);
+      void Choose(int * array, float * weights, int n, int k);
+
+   };
+
+extern Random globalRandom;
+
+#endif
+
diff --git a/genome.cpp b/genome.cpp
new file mode 100644 (file)
index 0000000..f46c661
--- /dev/null
@@ -0,0 +1,1396 @@
+////////////////////////////////////////////////////////////////////// \r
+// genome.cpp\r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// Version 0.2\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#define _CRT_SECURE_NO_DEPRECATE \r
+\r
+#include "Random.h"\r
+#include "Error.h"\r
+#include "stochastic.h" \r
+#include <iostream>\r
+#include <fstream>\r
+#include <math.h>\r
+#include <vector> \r
+#include <time.h>\r
+#include <algorithm>\r
+#include <map>\r
+#include <sstream>\r
+#include <stdio.h>\r
+\r
+using namespace std;\r
+\r
+//static bool debug = false;\r
+static bool fixedPOP = true;\r
+static bool noChange = true;\r
+\r
+\r
+static int N = 10000, n = 100, L = 1, poolsize = 1024 * 1024 * 64, length=10000, numChr=1, SNP=-1; // length = number of base per fragment\r
+static int maxN;\r
+static unsigned int currentProfile;\r
+static int nPOP=1;\r
+static vector<int> nSample(nPOP,3);\r
+static double recombination = 1e-8, migration = 0.0, mutation=1e-8;\r
+\r
+static int * genepool, * genetimes, * fragments, * MRCA, * MRCAtimes;\r
+static int *** children, *** parents, * cblocks, * pblocks;\r
+static int * parent_First, * parent_Last, * child_First, * child_Last;\r
+static bool * child_flags, * parent_flags;\r
+static int nextFree = 0, activeSegments = 0, pblockcounter = 0;\r
+static int numBLOCKs;\r
+static int BUFFER=1500;\r
+static int INCREMENT=1000;\r
+\r
+#define BLOCKSIZE   128\r
+#define SWAP(tmp,a,b)  tmp=a; a=b; b=tmp;\r
+\r
+static vector< vector< vector<bool> > > chromosome;\r
+static int * fragmentLength;\r
+static int * branchFragment;\r
+static int * geneIndex;\r
+static int * branchMutation;\r
+\r
+static float * recombVector=NULL;\r
+\r
+struct popProfilesStruct{\r
+       int generation;\r
+       vector<int> popsize;\r
+       vector<int> popStart;\r
+       vector< vector<float> > selectProb;\r
+       vector< vector<int> > popChange;\r
+};\r
+\r
+static vector<struct popProfilesStruct> popProfile;\r
+\r
+void print(){\r
+       cout<<"\n*****************\nParents:\t";\r
+       for(int i=0;i<N;i++){\r
+               if(parent_flags[i]){\r
+                       for(int j=0;j<L;j++) cout<<"["<<((parents[i][j / BLOCKSIZE]==NULL)?999:(parents[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
+                       cout<<"\t";     \r
+               }\r
+       }       \r
+       cout<<endl;\r
+       \r
+       cout<<"Children:\t";\r
+       for(int i=0;i<N;i++){\r
+               if(child_flags[i]){\r
+                       for(int j=0;j<L;j++) cout<<"["<<((children[i][j / BLOCKSIZE]==NULL)?999:(children[i][j / BLOCKSIZE][j % BLOCKSIZE]))<<"]";\r
+                       cout<<"\t";     \r
+               }\r
+       }       \r
+       cout<<endl;\r
+       \r
+       \r
+       \r
+//     cout<<"parent_flag:\t";\r
+//     for(int i=0;i<N;i++){\r
+//             cout<<parent_flags[i]<<"\t";\r
+//     }       \r
+//     cout<<endl;\r
+//     \r
+//     cout<<"children_flag:\t";\r
+//     for(int i=0;i<N;i++){\r
+//             cout<<child_flags[i]<<"\t";\r
+//     }       \r
+//     cout<<endl;\r
+       \r
+       cout<<"genepool:\t";\r
+       for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
+       cout<<endl;\r
+       \r
+       cout<<"genetimes:\t";\r
+       for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
+       cout<<endl;\r
+       \r
+       cout<<"MCRA:\t";\r
+       for(int i=0;i<L;i++)cout<<MRCA[i]<<" ";\r
+       cout<<endl;\r
+       \r
+       cout<<"MCRAtime:\t";\r
+       for(int i=0;i<L;i++)cout<<MRCAtimes[i]<<" ";\r
+       cout<<endl;\r
+       \r
+       cout<<"fragments:\t";\r
+       for(int i=0;i<L;i++)cout<<fragments[i]<<" ";\r
+       cout<<endl;\r
+       \r
+       cout<<"NextFree="<<nextFree<<endl;\r
+}\r
+\r
+int *** AllocateIntPtrMatrix(int rows, int columns){\r
+   int *** result = new int ** [rows];\r
+\r
+   for (int i = 0; i < rows; i++)\r
+      result[i] = new int * [columns];\r
+  \r
+   return result;\r
+}\r
+   \r
+void FreeIntPtrMatrix(int *** & matrix, int rows){\r
+       \r
+   for (int i = 0; i < rows; i++)\r
+      delete [] matrix[i];\r
+\r
+   delete [] matrix;\r
+\r
+   matrix = NULL;\r
+}   \r
+   \r
+void NewGeneration(){\r
+       \r
+   int *** ptrmatrix, * vector;\r
+   bool * boolvector;\r
+   \r
+//    for(int i=0;i<maxN;i++){\r
+//     if(parent_flags[i])printf("numBlock=%d parent=%d first=%d last=%d\n",numBLOCKs,i,parent_First[i],parent_Last[i]);   \r
+//    }\r
+   \r
+   SWAP(ptrmatrix, children, parents);\r
+   SWAP(vector, child_First, parent_First);\r
+   SWAP(vector, child_Last, parent_Last);\r
+   SWAP(boolvector, child_flags, parent_flags);\r
+   SWAP(vector, cblocks, pblocks);\r
+   \r
+   if(!noChange){\r
+          currentProfile++;\r
+          nPOP = (int)(popProfile[currentProfile].popsize.size());\r
+          N = popProfile[currentProfile].popStart.back()+popProfile[currentProfile].popsize.back();\r
+          noChange = true;\r
+   }\r
+\r
+   for (int i = 0; i < maxN; i++)\r
+      parent_flags[i] = 0;\r
+      \r
+//    for (int i = 0; i < maxN; i++)\r
+//        for (int j = 0; j < numBLOCKs; j++)\r
+//            parents[i][j] = NULL;\r
+      \r
+   pblockcounter = 0;\r
+\r
+}\r
+\r
+   \r
+void AllocateMemory(vector< vector<bool> > &return_chromosome){\r
+   \r
+   \r
+//    if(SNP<=0){\r
+//             cout<<"Reserve size for return_chromosome="<<max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP))<<endl;\r
+//    }\r
+//    else{\r
+//             cout<<"Reserve size for return_chromosome="<<SNP*numChr<<endl;\r
+//    }\r
+//    cout<<"Reserve size for working chromosome="<<max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP))<<endl;\r
+   \r
\r
+   return_chromosome.clear();\r
+   return_chromosome.resize(n);\r
+//    for(int i=0;i<n;i++){\r
+//        if(SNP<=0){\r
+//                     //return_chromosome[i].reserve(max(1,40*(int)(mutation*(double)numChr*(double)L*(double)length*(double)maxN/(double)nPOP)));\r
+//        }\r
+//        else{\r
+//                     //return_chromosome[i].reserve(SNP*numChr);\r
+//        }\r
+//     }\r
+   \r
+   chromosome.resize(n);\r
+   for(int i=0;i<n;i++){\r
+               chromosome[i].resize(L);\r
+//             for(int j=0;j<L;j++){\r
+//                if(SNP<=0){\r
+//                        //chromosome[i][j].reserve(max(1,40*(int)(mutation*(double)length*(double)maxN/(double)nPOP)));\r
+//                }\r
+//                else{\r
+//                        //chromosome[i][j].reserve(SNP/L+1);\r
+//                }\r
+//             }\r
+   }\r
+  \r
+   genepool = new int [poolsize];\r
+\r
+   numBLOCKs= (int)ceil((double)L / (double)BLOCKSIZE);\r
\r
+   pblockcounter = 0;\r
+\r
+}\r
+\r
+\r
+void AllocateMutationMemory(){\r
+       \r
+       fragmentLength = new int [L];\r
+       \r
+       branchFragment = new int [poolsize];\r
+       \r
+       geneIndex = new int [poolsize];   \r
+          \r
+}\r
+   \r
+   \r
+   \r
+void FreeMemory(){\r
+       \r
+   delete [] genepool;\r
+   \r
+   if(recombVector!=NULL)delete [] recombVector;\r
+   \r
+}\r
+   \r
+  \r
+void FreeMutationMemory(){\r
+       \r
+   delete [] fragmentLength;\r
+   delete [] geneIndex;\r
+   delete [] branchMutation;\r
+   \r
+       \r
+}\r
+\r
+void FreeCoalescentMemory(){\r
+       \r
+   delete [] MRCA;\r
+   delete [] MRCAtimes;\r
+\r
+   delete [] fragments;\r
+\r
+   FreeIntPtrMatrix(children,maxN);\r
+   \r
+   FreeIntPtrMatrix(parents,maxN);\r
+\r
+   delete [] child_flags;\r
+   delete [] parent_flags;\r
+\r
+   delete [] cblocks;\r
+   delete [] pblocks;\r
+   \r
+   delete [] parent_First;\r
+   delete [] parent_Last;\r
+\r
+   delete [] child_First;\r
+   delete [] child_Last;\r
+       \r
+}\r
+\r
+void reallocBlock(){\r
+       \r
+       pblocks = (int *)realloc(pblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
+       cblocks = (int *)realloc(cblocks, (unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE+INCREMENT*BLOCKSIZE)) );\r
+       BUFFER += INCREMENT;    \r
+       \r
+}\r
+\r
+void setnull(int parent, int position){\r
+       \r
+  if (parent_flags[parent] == 0){\r
+               parent_First[parent]=position / BLOCKSIZE;\r
+               parent_Last[parent]=position / BLOCKSIZE;       \r
+               parents[parent][position / BLOCKSIZE] = NULL;  \r
+  }\r
+  else if(parent_First[parent]>position / BLOCKSIZE){\r
+         for(int i=position / BLOCKSIZE;i<parent_First[parent];i++){\r
+                 parents[parent][i] = NULL;\r
+         }\r
+         parent_First[parent]=position / BLOCKSIZE;\r
+  }\r
+  else if(parent_Last[parent]<position / BLOCKSIZE){\r
+         for(int i=position / BLOCKSIZE;i>parent_Last[parent];i--){\r
+                 parents[parent][i] = NULL;\r
+         }\r
+         parent_Last[parent]=position / BLOCKSIZE;\r
+  }\r
+  \r
+}\r
+\r
+\r
+\r
+void TouchParent(int parent, int position){\r
+\r
+  \r
+  setnull(parent,position);\r
+               \r
+  if (parents[parent][position / BLOCKSIZE] != NULL)return;\r
\r
+\r
+  if (pblockcounter >= n*numBLOCKs+BUFFER){\r
+           \r
+           int * pblocks_old=pblocks;\r
+           int * cblocks_old=cblocks;\r
+           \r
+           reallocBlock();\r
+           \r
+           for(int i=0;i<N;i++)\r
+             if(parent_flags[i])\r
+               for(int j=parent_First[i];j<=parent_Last[i];j++)\r
+                       if(parents[i][j]!=NULL)parents[i][j] += pblocks - pblocks_old;\r
+           \r
+           for(int i=0;i<N;i++)\r
+             if(child_flags[i])\r
+               for(int j=child_First[i];j<=child_Last[i];j++)\r
+                       if(children[i][j]!=NULL)children[i][j] += cblocks - cblocks_old;\r
+               \r
+               printf("pblock and cblock enlarged, BUFFER=%d.!\n",BUFFER);\r
+               \r
+               if(pblocks==NULL || cblocks==NULL)error("Blocks memory reallocation error.\n");\r
+  }\r
+\r
+  parents[parent][position / BLOCKSIZE] = pblocks + BLOCKSIZE * pblockcounter++;\r
+  \r
+  for (int i = 0; i < BLOCKSIZE; i++)\r
+      parents[parent] [position / BLOCKSIZE] [i] = -1;\r
+      \r
+  parent_flags[parent] = 1;\r
+  \r
+}\r
+\r
+void Initialize(){\r
+       \r
+   genetimes = new int [poolsize];\r
+          \r
+   MRCA = new int [L];\r
+   MRCAtimes = new int [L];\r
+\r
+   fragments = new int [L];\r
+\r
+   children = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
+   parents = AllocateIntPtrMatrix(maxN, numBLOCKs);\r
+   \r
+   parent_First = new int [maxN];\r
+   parent_Last = new int [maxN];\r
+\r
+   child_First = new int [maxN];\r
+   child_Last = new int [maxN];\r
+\r
+   cblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));\r
+   pblocks = (int *)malloc((unsigned)(sizeof(int)*(n*(numBLOCKs)*BLOCKSIZE+BUFFER*BLOCKSIZE)));        \r
+  \r
+   child_flags = new bool [maxN];\r
+   parent_flags = new bool [maxN];\r
+   \r
+   \r
+   noChange = true;\r
+       \r
+   nPOP = (int)(popProfile[0].popsize.size());\r
+   \r
+   N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
+   \r
+   currentProfile = 0;\r
+       \r
+   nextFree = 0;\r
+   \r
+   for (int i = 0; i < N; i++)\r
+      child_flags[i] = 0;\r
+     \r
+   int count=0;\r
+   for(int k=0;k<nPOP;k++){    \r
+          for (int i = 0; i < nSample[k]; i++)\r
+             {\r
+                 child_First[popProfile[0].popStart[k]+i]=0;\r
+                 child_Last[popProfile[0].popStart[k]+i]=numBLOCKs-1;\r
+\r
+             child_flags[popProfile[0].popStart[k]+i] = 1;\r
+\r
+             for (int j = 0; j < numBLOCKs; j++)\r
+                       children[popProfile[0].popStart[k]+i][j] = cblocks + (count * L + j * BLOCKSIZE);\r
+               \r
+             for (int j = 0; j < L; j++)\r
+                {\r
+                genepool[nextFree] = 1;\r
+                genetimes[nextFree] = 0;\r
+                children[popProfile[0].popStart[k]+i][j / BLOCKSIZE][j % BLOCKSIZE] = nextFree++;\r
+                }\r
+                \r
+                count++;\r
+             }\r
+   }\r
+\r
+   for (int i = 0; i < L; i++)\r
+      fragments[i] = n;\r
+\r
+   for (int i = 0; i < N; i++)\r
+      parent_flags[i] = 0;\r
+\r
+   activeSegments = L;\r
+   \r
+   //for (int i = 0; i < N; i++)\r
+   //  for (int j = 0; j < numBLOCKs; j++)\r
+   //     parents[i][j] = NULL;\r
+      \r
+   pblockcounter = 0;\r
\r
+}\r
+\r
+void InitializeMutation(vector< vector<bool> > &return_chromosome){\r
+       \r
+//        return_chromosome.clear();\r
+//    return_chromosome.resize(n);\r
+//    for(int i=0;i<n;i++){\r
+//        return_chromosome[i].reserve(max(1,(int)(mutation*(double)numChr*(double)L*(double)length*40*(double)N/(double)nPOP)));\r
+//     }\r
+//    \r
+//    chromosome.resize(n);\r
+//    for(int i=0;i<n;i++){\r
+//             chromosome[i].resize(L);\r
+//             for(int j=0;j<L;j++){\r
+//                chromosome[i][j].reserve(max(1,(int)(mutation*(double)length*40*(double)N/(double)nPOP)));\r
+//             }\r
+//    }\r
+       \r
+//    cout<<"WGCS-- chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
+   \r
+   for(int i=0;i<n;i++){\r
+          for(int j=0;j<L;j++){\r
+                  chromosome[i][j].clear();\r
+          }\r
+   }\r
+   \r
+//    cout<<"WGCS-- after clear() chromosomes capacity="<<chromosome[0][0].capacity()<<" size="<<chromosome[0][0].size()<<endl;\r
+   \r
+   branchMutation = new int [nextFree];\r
+   for(int i=0;i<nextFree;i++)branchMutation[i]=0;\r
+\r
+}\r
+\r
+void PrepareMutation(){\r
+       \r
+   for(int i=0;i<L;i++)fragmentLength[i]=0;\r
+   \r
+   for(int i=0;i<nextFree;i++){\r
+          geneIndex[i] = -1; \r
+          branchFragment[i] = -1;\r
+   }   \r
+       \r
+}\r
+   \r
+int SampleParent(int individual, int previous){\r
+       \r
+       if(fixedPOP){      \r
+          int population = individual/popProfile[0].popsize[0]; \r
+          \r
+          if (previous == -1 && nPOP>1){\r
+             if (globalRandom.Next() < migration){\r
+                     int random = globalRandom.NextInt()%(nPOP-1); \r
+                 if(population<=random)population=random+1;\r
+                 else population=random;\r
+             }\r
+          }\r
+             \r
+          return globalRandom.NextInt() % (popProfile[0].popsize[0]) + popProfile[0].popStart[population];\r
+   }\r
+   else{\r
+          \r
+          if(noChange){\r
+                  int population=0; \r
+          \r
+                  for(int i=0;i<nPOP;i++){\r
+                          if(individual>=popProfile[currentProfile].popStart[i])population=i;\r
+                          else break;\r
+                  }\r
+                  \r
+//                cout<<"No change: ind="<<individual<<" pop="<<population<<endl;\r
+                  \r
+                  if (previous == -1 && nPOP>1){\r
+                     if (globalRandom.Next() < migration){\r
+                             int random = globalRandom.NextInt()%(nPOP-1); \r
+                         if(population<=random)population=random+1;\r
+                         else population=random;\r
+                     }\r
+                  }\r
+                  \r
+//                cout<<"new pop="<<population<<endl;\r
+                     \r
+                  return globalRandom.NextInt() % (popProfile[currentProfile].popsize[population]) + popProfile[currentProfile].popStart[population];\r
+          }\r
+          else{\r
+                  int population=0; \r
+          \r
+                  for(int i=0;i<nPOP;i++){\r
+                          if(individual>=popProfile[currentProfile].popStart[i])population=i; \r
+                          else break;\r
+                  }\r
+//                cout<<"Change: ind="<<individual<<" pop="<<population<<endl;\r
+                  \r
+                  if (previous == -1 && nPOP>1){\r
+                     if (globalRandom.Next() < migration){\r
+                             int random = globalRandom.NextInt()%(nPOP-1); \r
+                         if(population<=random)population=random+1;\r
+                         else population=random;\r
+                     }\r
+                  }\r
+                  \r
+//                cout<<"new pop="<<population<<endl;\r
+                  \r
+                  if(popProfile[currentProfile].popChange[population].size()==1){\r
+//                        cout<<"To next pop="<<popProfile[currentProfile].popChange[population][0]<<endl;\r
+                          \r
+                               return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][0]]) \r
+                                               + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][0]];      \r
+                  }\r
+                  else{\r
+                               double selectRandom = globalRandom.Next();\r
+                               for(unsigned int i=0;i<popProfile[currentProfile].popChange[population].size();i++){\r
+                                       if(selectRandom < popProfile[currentProfile].selectProb[population][i]){\r
+                                               \r
+//                                             cout<<"To next pop="<<popProfile[currentProfile].popChange[population][i]<<endl;\r
+                                               \r
+                                               return globalRandom.NextInt() % (popProfile[currentProfile+1].popsize[popProfile[currentProfile].popChange[population][i]]) \r
+                                               + popProfile[currentProfile+1].popStart[popProfile[currentProfile].popChange[population][i]];\r
+                                       }       \r
+                               }\r
+                               cout<<"Random number generator error!"<<endl;\r
+                               exit(1);\r
+                  }\r
+                  \r
+                  \r
+          }\r
+          \r
+   }\r
+}\r
+\r
+void readPopProfile(string filename, unsigned int numPopulation){\r
+          \r
+          int popProfile_index=0;\r
+          \r
+          popProfile.clear();\r
+          struct popProfilesStruct temp_popProfile;\r
+          popProfile.push_back(temp_popProfile);       \r
+                                  \r
+          ifstream popFile;\r
+          string fileline;\r
+          char *term;\r
+          char *term1;\r
+          \r
+          popFile.open(filename.c_str());\r
+          if(!popFile)error("Population profile: %s cannot be opened!",filename.c_str());\r
+          \r
+          getline(popFile,fileline);                                   // the first line is the population sizes at generation 0\r
+          \r
+          term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
+          popProfile[popProfile_index].generation=atoi(term);\r
+          \r
+          if(popProfile[popProfile_index].generation!=0)error("The first line in population profile should be generation 0!");\r
+          \r
+          term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
+          while(term!=NULL){\r
+                          popProfile[popProfile_index].popsize.push_back(atoi(term));\r
+                          term = strtok (NULL, " \t");\r
+          }\r
+\r
+          popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
+          for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
+                  popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
+          }\r
+                          \r
+          maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
+          \r
+          if(popProfile[0].popsize.size()!=numPopulation)error("The number of populations specified by -pop is not consistent with generation 0 in population profile:%s",filename.c_str());\r
+          \r
+          // read in the population correspondence to the next population profile\r
+          \r
+                  \r
+          while(popFile.peek()!=EOF){\r
+                  getline(popFile,fileline);                                           // the even number lines are the correspondence of populations of different generation\r
+                  popProfile[popProfile_index].popChange.resize(popProfile[popProfile_index].popsize.size());\r
+                  \r
+                  term = strtok ((char*)fileline.c_str(),"- \t");  // the FROM population\r
+                  while(term!=NULL){\r
+                          \r
+                          term1 = strtok (NULL, "- \t");                               // the TO population\r
+\r
+                          popProfile[popProfile_index].popChange[atoi(term)-1].push_back(atoi(term1)-1);\r
+                          \r
+                          term = strtok (NULL, "- \t");\r
+                  }\r
+                  \r
+                  popProfile_index++;\r
+                  popProfile.push_back(temp_popProfile);\r
+                  \r
+                  if(popFile.peek()==EOF)error("Error in population profile: the last line should specify the population sizes.");\r
+                  \r
+                  // read the next population profile\r
+                  \r
+                  getline(popFile,fileline);                                   // the population sizes\r
+   \r
+                  term = strtok ((char*)fileline.c_str()," \t");  // the first term is the generation\r
+                  popProfile[popProfile_index].generation=atoi(term);\r
+                  \r
+                  term = strtok (NULL, " \t");                                    // the second term and after are the sizes of that population\r
+                  while(term!=NULL){\r
+                                  popProfile[popProfile_index].popsize.push_back(atoi(term));\r
+                                  term = strtok (NULL, " \t");\r
+                  }\r
+                  \r
+                  popProfile[popProfile_index].popStart.push_back(0); // the first population starts from 0\r
+                  for(unsigned int i=0;i<popProfile[popProfile_index].popsize.size()-1;i++){\r
+                          popProfile[popProfile_index].popStart.push_back(popProfile[popProfile_index].popsize[i]+popProfile[popProfile_index].popStart.back());\r
+                  }\r
+                  \r
+                  if(maxN < popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back())\r
+                                       maxN = popProfile[popProfile_index].popStart.back()+popProfile[popProfile_index].popsize.back();\r
+          }    \r
+          \r
+          for(unsigned int i=0;i<popProfile.size();i++){\r
+                  popProfile[i].selectProb.resize(popProfile[i].popChange.size());\r
+                  for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
+                          float sum=0.0;\r
+                          for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)sum += popProfile[i+1].popsize[popProfile[i].popChange[j][k]];\r
+                          \r
+                          popProfile[i].selectProb[j].push_back(((float)popProfile[i+1].popsize[popProfile[i].popChange[j][0]])/sum);\r
+                          for(unsigned int k=1;k<popProfile[i].popChange[j].size();k++)\r
+                                       popProfile[i].selectProb[j].push_back(popProfile[i].selectProb[j].back()+popProfile[i+1].popsize[popProfile[i].popChange[j][k]]/sum);\r
+                  }\r
+          }\r
+          // check for consistence\r
+          \r
+          int min, max;\r
+          \r
+          for(unsigned int i=0;i<popProfile.size()-1;i++){\r
+                       min=99999;max=-1;\r
+                       for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
+                               if(popProfile[i].popChange[j].size()==0)error("One population does not have its parent population: profile file line %d, population %d\n",i+1,j+1);     \r
+                               for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++){\r
+                                       if(min>popProfile[i].popChange[j][k])min=popProfile[i].popChange[j][k];\r
+                                       if(max<popProfile[i].popChange[j][k])max=popProfile[i].popChange[j][k];\r
+                               }\r
+                       }   \r
+                       if(min!=0 || max!=(int)(popProfile[i+1].popsize.size()-1))\r
+                               error("The population changes does not consistent with the next population profile. line=%d, min=%d, max=%d, num POP in next line=%d\n",i+1,min+1,max+1,popProfile[i+1].popsize.size());\r
+          }\r
+       \r
+}\r
+\r
+\r
+void readRecombination(string filename, int pieces){\r
+\r
+          ifstream recombFile;\r
+          string fileline;\r
+          char *term;\r
+               \r
+          vector<float> recDistribution;\r
+          vector<float> recRate;\r
+          \r
+          float sum=0.0;\r
+          float random;\r
+          \r
+          recombFile.open(filename.c_str());\r
+          if(!recombFile)error("Recombination profile: %s cannot be opened!",filename.c_str());\r
+          \r
+          getline(recombFile,fileline);                // the first line is head of distribution table\r
+          term = strtok ((char*)fileline.c_str()," \t");\r
+          term = strtok (NULL, " \t"); \r
+          \r
+          if(strcmp(term,"Freq")==0 || strcmp(term,"freq")==0){\r
+                  while(recombFile.peek()!=EOF){\r
+                          getline(recombFile,fileline);\r
+                          \r
+                          term = strtok ((char*)fileline.c_str()," \t"); // the first number is the recombination rate\r
+                          recRate.push_back((float)(atof(term)));\r
+                          \r
+                          if(atof(term)<0 || atof(term)>1)error("Recombination rate should be between 0 and 1. input=%f",atof(term));\r
+                          \r
+                          term = strtok (NULL, " \t");                                   // the second number is the frequency of recombination rate\r
+                          recDistribution.push_back((float)(atof(term)));\r
+                          \r
+                          if(atof(term)<=0)error("Frequency should be positive. input=%f",atof(term));\r
+                          \r
+                          sum += (float)(atof(term));\r
+                  }\r
+                  \r
+                  if(recRate.size()!=recDistribution.size() || recRate.size()==0)error("Errors in the recombination profile: %s",filename.c_str());\r
+                  \r
+                  // normalize the frequency \r
+                  for(unsigned int i=0;i<recDistribution.size();i++) recDistribution[i] /= sum;\r
+                  \r
+                  // compute the cumulative distribution\r
+                  for(unsigned int i=1;i<recDistribution.size();i++) recDistribution[i] += recDistribution[i-1];\r
+                  \r
+                  recombVector = new float [pieces-1];\r
+                  \r
+                  for(int i=0;i<pieces-1;i++){\r
+                          random = (float)(globalRandom.Next());\r
+                          for(unsigned int j=0;j<recDistribution.size();j++){\r
+                                       if(random < recDistribution[j]){\r
+                                               recombVector[i]=recRate[j];\r
+                                               break;   \r
+                                       }\r
+                          }\r
+                  }\r
+                  \r
+                       //check the input\r
+                       cout<<"*** RECOMBINATION RATES PROFILE ***"<<endl;\r
+                       cout<<"Recombination_Rate\tCumulative_frequency"<<endl;\r
+                       for(unsigned int i=0;i<recRate.size();i++){\r
+                               cout<<recRate[i]<<"\t\t\t"<<recDistribution[i]<<endl;  \r
+                       }               \r
+                          \r
+          }\r
+          else if(strcmp(term,"Pos")==0 || strcmp(term,"pos")==0){\r
+                  float currentRec=0.0,nextRec=0.0;\r
+                  int nextPos=-1;\r
+                  \r
+                  if(recombFile.peek()!=EOF){\r
+                          getline(recombFile,fileline);\r
+                          term = strtok ((char*)fileline.c_str()," \t");\r
+                          currentRec=(float)(atof(term));\r
+                          \r
+                          if(recombFile.peek()!=EOF){\r
+                                  getline(recombFile,fileline);\r
+                                  term = strtok ((char*)fileline.c_str()," \t");\r
+                              nextRec=(float)(atof(term));\r
+                              term = strtok (NULL, " \t");     \r
+                              nextPos=atoi(term);\r
+                          }\r
+                          else{\r
+                                  nextPos=-1;\r
+                          }\r
+                          \r
+                  }\r
+                  else{\r
+                          error("Recombination file does not contain recombination rate information.\n");\r
+                  }\r
+                  \r
+                  recombVector = new float [pieces-1];\r
+                  \r
+                  for(int i=0;i<pieces-1;i++){\r
+                          if(i+1==nextPos){\r
+                                   currentRec=nextRec;\r
+                                   if(recombFile.peek()!=EOF){\r
+                                               getline(recombFile,fileline);\r
+                                               term = strtok ((char*)fileline.c_str()," \t");\r
+                                       nextRec=(float)(atof(term));\r
+                                       term = strtok (NULL, " \t");    \r
+                                       nextPos=atoi(term);\r
+                                       }\r
+                                       else{\r
+                                               nextPos=-1;\r
+                                       }\r
+                               }\r
+                               \r
+                               recombVector[i]=currentRec;\r
+                  }\r
+                  \r
+                  \r
+                  \r
+          }\r
+          else{\r
+                  error("The second column is not \"freq\" or \"pos\".\n");\r
+          }\r
+          \r
+          \r
+\r
+          \r
+          cout<<"The recombination rates between fragments are:"<<endl<<"\t";\r
+          for(int i=0;i<pieces-1;i++)cout<<"("<<i+1<<") "<<recombVector[i]<<" ";\r
+          cout<<"("<<pieces<<")"<<endl;\r
+          cout<<"*** END OF RECOMBINATION RATES PROFILE ***"<<endl;\r
+               \r
+}\r
+\r
+void drawtrees(){\r
+       \r
+       map<int,string > tree;\r
+       ostringstream  temp;\r
+       \r
+                          \r
+       for(int i=0;i<L;i++){\r
+               tree.clear();\r
+               \r
+               for(int j=0;j<n;j++){\r
+                       if(tree[genepool[j*L+i]]!=""){\r
+                               temp.str("");\r
+                               temp <<tree[genepool[j*L+i]]<<","<<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
+                               tree[genepool[j*L+i]]=  temp.str();\r
+                       }\r
+                       else{\r
+                               temp.str("");\r
+                               temp <<j+1<<":"<<genetimes[genepool[j*L+i]]-genetimes[j*L+i];\r
+                               tree[genepool[j*L+i]]=  temp.str();\r
+                       }\r
+               }\r
+               \r
+       \r
+               for(int j=n*L;j<nextFree;j++){\r
+                       if(tree[j]!=""){\r
+                               if(genepool[j]==0){\r
+                                       cout<<"The tree for fragment "<<i+1<<"= ("<<tree[j]<<");"<<endl;\r
+                               }\r
+                               else{                                                   \r
+                                       if(tree[genepool[j]]!=""){\r
+                                               temp.str("");\r
+                                               temp <<tree[genepool[j]]<<",("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
+                                               tree[genepool[j]]=      temp.str();\r
+                                       }\r
+                                       else{\r
+                                               temp.str("");\r
+                                               temp <<"("<<tree[j]<<"):"<<genetimes[genepool[j]]-genetimes[j];\r
+                                               tree[genepool[j]]=temp.str();\r
+                                       }\r
+                               }\r
+                       }\r
+               }\r
+       }\r
+       \r
+       tree.clear();\r
+}\r
+\r
+\r
+\r
+\r
+void genome(string popSize,                                            // effective population size of each population\r
+                 int nSubPOP,                                                  // number of populations\r
+                 vector<int> & nSubSample,                     // number of samples (haplotypes) draw from each populations\r
+                 int numPieces,                                                // number of fragments for each sample (chromosome)\r
+                 int pieceLen,                                                 // length in base pair of each fragment\r
+                 int numIndepRegion,                                   // number of independent regions (independent chromosome)\r
+                 int s,                                                                // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy, -1 means the number of mutation follows a Poisson distribution \r
+                 string rec,                                                   // recombination rate between consecutive fragments per generation\r
+                 double mut,                                                   // mutation rate per generation per base pair\r
+                 double mig,                                                   // migration rate per generation\r
+                 vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
+                 long t,                                                               // random seed \r
+                 int drawtree,                                         // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees   \r
+                 bool printparameters                                  // =1 print out all input parameters, =0 do not print input parameters\r
+                 )\r
+{   \r
+   if(atoi(popSize.c_str())>0){                // if the population size is fixed during evoluation\r
+          N = atoi(popSize.c_str())*nSubPOP;\r
+          maxN = N;\r
+          \r
+          fixedPOP = true;\r
+          \r
+          popProfile.clear();\r
+          struct popProfilesStruct temp_popProfile;\r
+          popProfile.push_back(temp_popProfile);\r
+          \r
+          popProfile[0].generation = 0;\r
+          \r
+          for(int i=0;i<nSubPOP;i++)popProfile[0].popsize.push_back(atoi(popSize.c_str()));\r
+          for(int i=0;i<nSubPOP;i++)popProfile[0].popStart.push_back(i*atoi(popSize.c_str()));\r
+          \r
+          cout<<"*** POPULATION PROFILE ***"<<endl;\r
+          cout<<"The size of each population is fixed to "<<atoi(popSize.c_str())<<endl;\r
+          cout<<"Sizes of populations = ";\r
+          for(unsigned int j=0;j<popProfile[0].popsize.size();j++)cout<<popProfile[0].popsize[j]<<" ";\r
+          cout<<endl;\r
+//        cout<<"POP Start index = ";\r
+//        for(int j=0;j<popProfile[0].popStart.size();j++)cout<<popProfile[0].popStart[j]<<" ";\r
+//        cout<<endl;\r
+          cout<<"*** END OF POPULATION PROFILE ***"<<endl<<endl;\r
+                       \r
+   }\r
+   else{                                                                       // popSize stores the filename of the population profile during evoluation\r
+          fixedPOP = false;      \r
+       \r
+          currentProfile = 0;\r
+          \r
+          readPopProfile(popSize,nSubPOP);\r
+          \r
+          N = popProfile[0].popStart.back()+popProfile[0].popsize.back();\r
+          \r
+          // print out the population profile information.\r
+          \r
+          cout<<"*** POPULATION PROFILE ***"<<endl;\r
+          cout<<"Maximum total size of populations at all generations: N = "<<maxN<<endl;\r
+          cout<<"Total size of populations at generation 0: N = "<<N<<endl<<endl;\r
+          \r
+          for(unsigned int i=0;i<popProfile.size();i++){\r
+                       cout<<"Generation = "<<popProfile[i].generation<<endl;\r
+                       cout<<"Sizes of populations= ";\r
+                       for(unsigned int j=0;j<popProfile[i].popsize.size();j++)cout<<popProfile[i].popsize[j]<<" ";\r
+                       cout<<endl;\r
+//                     cout<<"POP Start index = ";\r
+//                     for(int j=0;j<popProfile[i].popStart.size();j++)cout<<popProfile[i].popStart[j]<<" ";\r
+//                     cout<<endl;\r
+                       if(popProfile[i].popChange.size()>0){\r
+                               cout<<"Populations change backward in time:"<<endl;\r
+                               for(unsigned int j=0;j<popProfile[i].popChange.size();j++){\r
+                                       cout<<" From "<<j+1<<" to ";\r
+                                       for(unsigned int k=0;k<popProfile[i].popChange[j].size();k++)cout<<popProfile[i].popChange[j][k]+1<<" ";\r
+                                       cout<<endl;\r
+                               }\r
+                               \r
+//                             cout<<"Selection cumulative probability:"<<endl;   \r
+//                             for(int j=0;j<popProfile[i].selectProb.size();j++){\r
+//                                     cout<<" From "<<j+1<<" with prob = ";\r
+//                                     for(int k=0;k<popProfile[i].selectProb[j].size();k++)cout<<popProfile[i].selectProb[j][k]<<" ";\r
+//                                     cout<<endl;\r
+//                             }\r
+                       }\r
+                       if(i==popProfile.size()-1)cout<<"*** END OF POPULATION PROFILE ***"<<endl;\r
+                       cout<<endl;\r
+          }\r
+   }\r
+   \r
+   n=0;\r
+   nPOP=nSubPOP;\r
+   nSample.resize(nPOP);\r
+   for(int i=0;i<nPOP;i++){\r
+          if(popProfile[0].popsize[i]<nSubSample[i])error("Population size should be larger than sample size!");\r
+          n += nSubSample[i];\r
+          nSample[i]=nSubSample[i];\r
+   }\r
+   \r
+   L = numPieces;\r
+   length = pieceLen;\r
+   numChr = numIndepRegion;\r
+   SNP=s;\r
+   \r
+   if(atof(rec.c_str())>0){                            // if the recombination rate is fixed\r
+          recombination = atof(rec.c_str());\r
+   }\r
+   else{                                               // the distribution of recombination rate is described in the file (filename stored in rec)\r
+          recombination = -1;\r
+          readRecombination(rec,L);\r
+   }\r
+   \r
+   \r
+   \r
+   mutation = mut;\r
+   migration = mig;\r
+   \r
+   if(t<=0)t=(long)(time(0)); // if t >0 we set the seed to t\r
+   cout<<endl<<"random seed="<<t<<endl;\r
+   globalRandom.Reset(t);      \r
+   \r
+   if(printparameters){\r
+          printf("popSize=%s, n=%d, nPOP=%d, L=%d, length=%d, numChr=%d, SNP=%d, rec=%s, mut=%.4e, mig=%.4e, drawtree=%d, printparameters=%d\n",popSize.c_str(),n,nPOP,L,length,numChr,SNP,rec.c_str(),mut,mig,drawtree,printparameters);\r
+          cout<<"subSample = ";\r
+          for(int i=0;i<nPOP;i++){\r
+                  cout<<nSample[i]<<" ";\r
+          }\r
+          cout<<endl;\r
+   }\r
+   \r
+   if(N<=0 || n<=0 || L<=0 || length <=0 || numChr <=0 || atof(rec.c_str()) <0 || mut <0 || mig<0){\r
+          cout<<"Input parameters have to be positive."<<endl;\r
+          printf("N=%d, n=%d, L=%d, length=%d, numChr=%d, recombination=%s, mutation=%lf, migration=%lf\n",N,n,L,length,numChr,rec.c_str(),mut,mig);\r
+          exit(0);\r
+   }\r
+\r
+   poolsize = 2 * n * L - L;\r
+\r
+   AllocateMemory(return_chromosome);\r
+\r
+//    cout<<"memory OK!"<<endl<<endl;\r
+   \r
+       for(int chr=0;chr<numChr;chr++){\r
+               \r
+          Initialize();\r
+          \r
+//        cout<<"Initialization OK!"<<endl;\r
+       \r
+          bool done = false;\r
+          int  generation = 0, units = n * L;\r
+          \r
+          while (!done)\r
+             {\r
+       //       printf("\n\nGeneration %d -- Tracking %d units of sequence [pool used %.2f%%]   %c",\r
+       //              generation++, units, nextFree * 100. / poolsize,\r
+       //              !debug ? '\r' : '\n');\r
+       \r
+                 generation++;\r
+                 \r
+                 if(!fixedPOP && currentProfile<popProfile.size()-1)\r
+                               if(generation == popProfile[currentProfile+1].generation)noChange=false;\r
+                 \r
+             //if (debug)\r
+             //   {\r
+             //   printf("F  : ");\r
+             //   for (int i = 0; i < L; i++)\r
+             //      printf("%3d ", fragments[i]);\r
+             //   printf("\n");\r
+             //   }\r
+       \r
+             // Position of the first segment allocated to the current generation\r
+             int generationStart = nextFree;\r
+       \r
+             // For each individual in the population\r
+             for (int i = 0; i < N && !done; i++)\r
+                if (child_flags[i])\r
+                   {\r
+                           \r
+                   /*if (debug) printf("%2d : ", i);*/\r
+       \r
+                   // The position we are currently tracking and its distance\r
+                   // from the previous position\r
+                   int distance = 0, parent = -1;\r
+                   int startPiece = 0;\r
+                   float recombProb;\r
+           \r
+                           for (int block = child_First[i]; block <= child_Last[i]; block++)\r
+                if (children[i][block] == NULL)\r
+                   distance += BLOCKSIZE;\r
+                else\r
+                   {\r
+                   int pos = BLOCKSIZE * block;\r
+                   int pos_in_block;\r
+                                  int pos_bound = (L < BLOCKSIZE * (block + 1) ? L : BLOCKSIZE * (block + 1) );\r
+\r
+                   while (pos <  pos_bound )\r
+                       {\r
+                              \r
+                          pos_in_block = pos % BLOCKSIZE;\r
+                       // Skip through uninformative positions along each chromosome\r
+                       if (children[i][block][pos_in_block] == -1)\r
+                          {\r
+                          //if (debug) printf("  . \n");\r
+                          distance++;\r
+                          pos++;\r
+                          continue;\r
+                          }\r
+                         \r
+                       // Pick an ancestor at random for the current piece of chromosome\r
+                       \r
+                       if(recombination>0){                    // if the recombination rate is fixed over the genome\r
+                              recombProb = (float)(pow(1.0 - recombination, distance));\r
+                       }\r
+                       else{                                           // if the recombination rate is specified by the profile\r
+                              recombProb = 1;\r
+                              for(int recV_iter=startPiece;recV_iter<pos;recV_iter++)recombProb *= (1-recombVector[recV_iter]);\r
+                       }\r
+                       \r
+                    \r
+                       if (parent == -1 ||\r
+                           distance > 0 && globalRandom.Next() > recombProb){\r
+                                   parent = SampleParent(i, parent);\r
+                        }\r
+                            \r
+                       \r
+                       TouchParent(parent, pos);\r
+                       \r
+                       \r
+                       // Reset distance between segments\r
+                       distance = 1;\r
+                       startPiece=pos;\r
+\r
+                       //if (debug) printf("parent=%3d pos=%d address=%d address2=%d\n", parent,pos,parents[parent][block],parents[parent][pos/BLOCKSIZE]);\r
+                       \r
+                       // Check if we sampled a new ancestral segment ...\r
+                       if (parents[parent][block][pos_in_block] == -1)\r
+                          {\r
+                          // We delay sampling a new gene from the pool until we know\r
+                          // a coalescent event occured ...\r
+                          parents[parent][block][pos_in_block] = children[i][block][pos_in_block];\r
+                          \r
+                          }\r
+                       // Or if a coalescent event occured\r
+                       else\r
+                          {\r
+                          // If we previously delayed sampling this parent\r
+                          if (parents[parent][block][pos_in_block] < generationStart)\r
+                            {\r
+                                   genetimes[nextFree] = generation;\r
+                                   genepool[parents[parent][block][pos_in_block]] = nextFree;\r
+                                   parents[parent][block][pos_in_block] = nextFree++;\r
+\r
+                                   if (nextFree == poolsize && units != 2)\r
+                                      error("Genepool exhausted\n");\r
+                            }\r
+\r
+                          genepool[children[i][block][pos_in_block]] = parents[parent][block][pos_in_block];\r
+                          \r
+                          fragments[pos]--;\r
+                          units--;\r
+\r
+                         if (fragments[pos] == 1)\r
+                            {\r
+                            // Reached the MRCA for this fragment\r
+                            MRCAtimes[pos] = generation;\r
+                            MRCA[pos] = parents[parent][block][pos_in_block];\r
+                            \r
+                            genepool[parents[parent][block][pos_in_block]]=0;\r
+                            \r
+                            parents[parent][block][pos_in_block] = -1;\r
+                            units--;\r
+         \r
+                            // One less segment to track\r
+                            if (--activeSegments == 0)\r
+                               {\r
+                               done = true;\r
+                               break;\r
+                               }\r
+                            }\r
+                         }\r
+                      pos++;\r
+                      }\r
+                   }\r
+                   }\r
+                   \r
+             NewGeneration();\r
+           \r
+             }\r
+       \r
+          if(drawtree){\r
+                  cout<<endl<<"Genealogy trees for chromosome:"<<chr+1<<endl;\r
+                  drawtrees();\r
+          }  \r
+             \r
+          // assign mutations\r
+\r
+          cout<<"Simulate mutation for Chromosome "<<chr+1<<endl;\r
+          \r
+          FreeCoalescentMemory();\r
+          \r
+          cout<<"Coalescent Memory free"<<endl;\r
+          \r
+          InitializeMutation(return_chromosome);\r
+\r
+          cout<<"Mutation process initialized"<<endl;\r
+               \r
+//         cout<<"genepool:\t";\r
+//             for(int i=0;i<2*n*L-L;i++)cout<<genepool[i]<<" ";\r
+//             cout<<endl;\r
+//             \r
+//             cout<<"genetimes:\t";\r
+//             for(int i=0;i<2*n*L-L;i++)cout<<genetimes[i]<<" ";\r
+//             cout<<endl;\r
+//        \r
+//             cout<<"nextFree="<<nextFree<<endl;\r
+               \r
+//             cout<<"mutation:\t";\r
+//             for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
+//             cout<<endl;\r
+               \r
+          if(SNP<=0){\r
+                  for(int i=0;i<nextFree;i++){\r
+                               if(genepool[i]!=0)branchMutation[i]=poissonInt((double)length*(double)(genetimes[genepool[i]]-genetimes[i])*mutation);   \r
+                  }\r
+          }\r
+          else{\r
+                  unsigned long * cumulativeCount = new unsigned long [nextFree];\r
+                  \r
+                  if(genepool[0]!=0)cumulativeCount[0] = genetimes[genepool[0]]-genetimes[0];\r
+                  else cumulativeCount[0] = 0;\r
+                  \r
+                  for(int i=1;i<nextFree;i++){\r
+                               if(genepool[i]!=0)cumulativeCount[i] = cumulativeCount[i-1] + genetimes[genepool[i]]-genetimes[i]; \r
+                               else cumulativeCount[i] = cumulativeCount[i-1];  \r
+                  }\r
+                  \r
+                  unsigned long totalGeneration = cumulativeCount[nextFree - 1];\r
+                  \r
+//                cout<<"Total Generation = "<<totalGeneration<<endl;\r
+//                \r
+//                if((unsigned long)SNP>totalGeneration){\r
+//                             cout<<"Required number of SNPs("<<SNP<<") is more than total number of generations("<<totalGeneration<<")!"<<endl;   \r
+//                }\r
+                  \r
+                  for(int i=0;i<SNP;i++){\r
+                          \r
+                          unsigned long randomLong = globalRandom.NextInt()%totalGeneration;\r
+                          \r
+                          int begin = 0;\r
+                          int end = nextFree - 1;\r
+                          int middle = (begin+end)/2;\r
+                          \r
+                          while(1){\r
+                                  if(cumulativeCount[middle]<randomLong){\r
+                                               begin = middle;\r
+                                  }\r
+                                  else{\r
+                                               end = middle;\r
+                                  }\r
+                                  \r
+                                  if(end == begin+1)break;\r
+                                  \r
+                                  middle = (begin+end)/2;\r
+                          }\r
+                          \r
+                          if(cumulativeCount[end] < randomLong || (cumulativeCount[begin] >= randomLong && begin != 0)){\r
+                                       cout<<"searching error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
+                                       error("see above message!\n");\r
+                          }\r
+                          \r
+                          int sampleIndex;\r
+                          \r
+                          if(cumulativeCount[begin]>=randomLong)sampleIndex = begin;\r
+                          else sampleIndex = end;\r
+                          \r
+                          if(genepool[sampleIndex]==0){\r
+                                  cout<<"genepool error: begin="<<begin<<" end="<<end<<" random="<<randomLong<<" c1="<<cumulativeCount[begin]<<" c2="<<cumulativeCount[end]<<endl;   \r
+                                  error("see above message!\n");\r
+                          }\r
+                          \r
+                          branchMutation[sampleIndex]++;\r
+                  }\r
+                  \r
+                  delete [] cumulativeCount;\r
+                  \r
+          }\r
+   \r
+          delete [] genetimes;\r
+          \r
+          cout<<"Mutation assigned"<<endl;\r
+           \r
+//     cout<<"mutation:\t";\r
+//             for(int i=0;i<nextFree;i++)cout<<branchMutation[i]<<" ";\r
+//             cout<<endl;\r
+\r
+          AllocateMutationMemory();\r
+          \r
+          cout<<"Mutation Memory allocated"<<endl;\r
+               \r
+          PrepareMutation();\r
+       \r
+          for(int i=0;i<n;i++){\r
+                       for(int j=0;j<L;j++){\r
+                               if(branchMutation[i*L+j]>0){\r
+                                       geneIndex[i*L+j] = fragmentLength[j];\r
+                                       fragmentLength[j] += branchMutation[i*L+j];\r
+                               }\r
+                               branchFragment[i*L+j] = j;\r
+                       }\r
+          }   \r
+             \r
+          \r
+          for(int i=0;i<nextFree;i++){\r
+                       if(genepool[genepool[i]]!=0 && genepool[i]!=0){\r
+                          if(branchMutation[genepool[i]]>0 && branchFragment[genepool[i]]== -1 ){\r
+                                  geneIndex[genepool[i]] = fragmentLength[branchFragment[i]];\r
+                                  fragmentLength[branchFragment[i]] += branchMutation[genepool[i]];\r
+                          }\r
+                          branchFragment[genepool[i]]=branchFragment[i];\r
+                       }\r
+          }\r
+          \r
+          //cout<<"branchfragment done"<<endl;\r
+          \r
+//     cout<<"branchFragment:\t";\r
+//             for(int i=0;i<nextFree;i++)cout<<branchFragment[i]<<" ";\r
+//             cout<<endl;\r
+\r
+//     cout<<"geneIndex:\t";\r
+//             for(int i=0;i<nextFree;i++)cout<<geneIndex[i]<<" ";\r
+//             cout<<endl;\r
+//     \r
+//             cout<<"fragmentLength:\t";\r
+//             for(int i=0;i<L;i++)cout<<fragmentLength[i]<<" ";\r
+//             cout<<endl;\r
+               \r
+\r
+//         cout<<"genepool genetime mutation frag index\n";\r
+//             for(int i=0;i<nextFree;i++)cout<<genepool[i]<<" "<<genetimes[i]<<" "<<branchMutation[i]<<" "<<branchFragment[i]<<" "<<geneIndex[i]<<endl;\r
+//             cout<<endl;\r
+          \r
+\r
+   \r
+       delete [] branchFragment;\r
+       \r
+          cout<<"Internal memory free"<<endl;\r
+          \r
+          // print the fragment index for each SNP\r
+          cout<<"SNP genetic position scaled to [0,1]:"<<endl;\r
+          if(L>1){\r
+                  for(int i=0;i<L;i++){\r
+                               for(int j=0;j<fragmentLength[i];j++)cout<<(float)i/(float)(L-1)<<" ";   \r
+                  }\r
+          }\r
+          else{\r
+                  cout<<"Only one fragment to be simulated. No recombination between SNPs.";\r
+          }\r
+          cout<<endl;\r
+                  \r
+          int totalLength=0;\r
+          if(SNP <=0){ \r
+                  for(int i=0;i<L;i++)totalLength += fragmentLength[i];\r
+          }\r
+          \r
+          \r
+          for(int i=0;i<n;i++){\r
+                       for(int j=0;j<L;j++){\r
+                               chromosome[i][j].resize(fragmentLength[j],0);\r
+\r
+                               for(int k=0;k<branchMutation[i*L+j];k++)chromosome[i][j][geneIndex[i*L+j]+k] = 1;\r
+                               \r
+                               int current=i*L+j;\r
+               \r
+                               while(genepool[genepool[current]]!=0){\r
+                                       for(int k=0;k<branchMutation[genepool[current]];k++)chromosome[i][j][geneIndex[genepool[current]]+k]=1;\r
+                                       current=genepool[current];\r
+                               }\r
+                       }\r
+          }\r
+          \r
+          cout<<"Chromosome done"<<endl;\r
+          \r
+          FreeMutationMemory();\r
+          \r
+          if(SNP <=0){ \r
+                  for(int i=0;i<n;i++){\r
+                               return_chromosome[i].reserve(return_chromosome[i].size()+totalLength);\r
+                  }\r
+          }\r
+          \r
+          for(int i=0;i<n;i++){\r
+                       for(int j=0;j<L;j++){\r
+                               return_chromosome[i].insert(return_chromosome[i].end(),chromosome[i][j].begin(),chromosome[i][j].end());\r
+                       }\r
+          }     \r
+          \r
+          cout<<"Return chromosome done"<<endl;\r
+          \r
+          // print the chromosomes\r
+//        for(int i=0;i<n;i++){\r
+//                     printf("Chr %d: ",i);\r
+//                     for(int j=0;j<L;j++){\r
+//                             for(int k=0;k<chromosome[i][j].size();k++)cout<<chromosome[i][j][k];\r
+//                             cout<<" ";      \r
+//                     }\r
+//                     cout<<endl;\r
+//        }       \r
+//        \r
+//        \r
+//       for(int i=0;i<n;i++){\r
+//                     printf("return Chr %d: ",i);\r
+//                     for(int j=0;j<return_chromosome[i].size();j++){\r
+//                             cout<<return_chromosome[i][j];\r
+//                     }\r
+//                     cout<<endl;\r
+//        }\r
+\r
+          \r
+   \r
+   }\r
+   \r
+   FreeMemory();\r
+   \r
+   printf("\nDone simulating ARG ...\n");\r
+   \r
+   \r
+   \r
+}\r
+\r
+\r
diff --git a/genome.h b/genome.h
new file mode 100644 (file)
index 0000000..ec8f113
--- /dev/null
+++ b/genome.h
@@ -0,0 +1,62 @@
+////////////////////////////////////////////////////////////////////// \r
+// genome.h \r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+#include <vector>\r
+#include <string>\r
+\r
+using namespace std;\r
+\r
+void genome(string popSize,                                            // effective population size of each population\r
+                 int nSubPOP,                                                  // number of populations\r
+                 vector<int> & nSubSample,                     // number of samples (haplotypes) draw from each populations\r
+                 int numPieces,                                                // number of fragments for each sample (chromosome)\r
+                 int pieceLen,                                                 // length in base pair of each fragment\r
+                 int numIndepRegion,                                   // number of independent regions (independent chromosome)\r
+                 int s,                                                                // fixed number of SNPs want to simulate, randomly place s SNPs on the genealogy \r
+                 string rec,                                                   // recombination rate between consecutive fragments per generation\r
+                 double mut,                                                   // mutation rate per generation per base pair\r
+                 double mig,                                                   // migration rate per generation\r
+                 vector< vector<bool> > &return_chromosome, // the return chromosome by connecting all independent chromosome into one long chromosome\r
+                 long t,                                                               // random seed \r
+                 int drawtree,                                         // drawtree=1 output the genealogy trees for each fragment and chromosome; drawtree=0 do not output the trees   \r
+                 bool printparameters =false                   // =1 print out all input parameters, =0 do not print input parameters\r
+                 );\r
+\r
+\r
+\r
\r
diff --git a/makefile b/makefile
new file mode 100644 (file)
index 0000000..bfe2dd4
--- /dev/null
+++ b/makefile
@@ -0,0 +1,27 @@
+# module name
+NAME = genome
+
+# switches
+SW = -O2 -Wall
+
+# libreries
+LIB = -lm
+
+# compiler
+CC = g++
+
+# main module 
+
+OBJ = Main.o genome.o  Random.o Error.o stochastic.o
+
+$(NAME): $(OBJ)
+       $(CC) -o $(NAME) $(OBJ) $(LIB) $(SW)
+
+.cpp.o : 
+       $(CC) $(SW) -o $@ -c $*.cpp 
+
+.SUFFIXES : .cpp .c .o $(SUFFIXES)
+
+clean : 
+       rm -rf *.o core*
+
diff --git a/makefile-32bitsys b/makefile-32bitsys
new file mode 100644 (file)
index 0000000..7626bae
--- /dev/null
@@ -0,0 +1,27 @@
+# module name
+NAME = genome-32bit
+
+# switches
+SW = -O2 -Wall -m32
+
+# libreries
+LIB = -lm
+
+# compiler
+CC = g++
+
+# main module 
+
+OBJ = Main.o genome.o  Random.o Error.o stochastic.o
+
+$(NAME): $(OBJ)
+       $(CC) -o $(NAME) $(OBJ) $(LIB) $(SW)
+
+.cpp.o : 
+       $(CC) $(SW) -o $@ -c $*.cpp 
+
+.SUFFIXES : .cpp .c .o $(SUFFIXES)
+
+clean : 
+       rm -rf *.o core*
+
diff --git a/population.txt b/population.txt
new file mode 100644 (file)
index 0000000..e162178
--- /dev/null
@@ -0,0 +1,7 @@
+0      500     500     500
+1-1 2-1 3-2
+10     700     300
+1-1 1-2 2-3
+20     500     200     300
+1-1 2-1 3-1
+30     500
\ No newline at end of file
diff --git a/readme.txt b/readme.txt
new file mode 100644 (file)
index 0000000..1b62610
--- /dev/null
@@ -0,0 +1,84 @@
+
+GENOME: coalescent-based whole genome simulator
+(c) 2006-2009 Liming Liang, Goncalo Abecasis
+
+
+
+DISCLAIMER
+==========
+
+This is version of GENOME is provided to you free of charge.  It comes with
+no guarantees. If you report bugs and provide helpful comments, the next
+version will be better.
+
+
+DISTRIBUTION
+============
+
+The latest version of GENOME is available at:
+
+   http://www.sph.umich.edu/csg/liang/genome
+
+If you use GENOME please register by e-mailing lianglim@umich.edu. 
+
+Code for GENOME is subject to specific
+license conditions -- please see file LICENSE.GENOME for additional
+information. 
+
+Code for the Mersenne Twister random number generator is subject to specific
+license conditions -- please see file LICENSE.twister for additional
+information.
+
+
+
+
+BRIEF INSTRUCTIONS
+==================
+
+A complete manual is available at:
+
+     http://www.sph.umich.edu/csg/liang/genome
+
+
+Parameters and default values are listed below:
+
+      -pop       number of subpopulations and size of subsamples [ 2 10 10 ] 
+      -N         effective size of each subpopulation or the filename for population profile [10000] 
+      -c         number of independent regions (chromosomes) to simulate [1] 
+      -pieces    number of fragments per independent region [100] 
+      -len       length in base of each fragment [10000] 
+      -s         fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [-1] 
+      -rec       recombination rate bewteen consecutive fragments or the filename for recombination rate distribution [0.0001] 
+      -mut       mutation rate per generation per base pair [1e-08] 
+      -mig       migration rate per generation per individual [0.00025] 
+      -seed      random seed, -1 = use time as the seed [-1] 
+      -tree      1=draw the genealogy trees, 0=do not output [0] 
+      -maf       Output SNPs with minor allele frequence greater than [0] 
+      -prop      To keep this proportion of SNPs with MAF < the value of -maf parameter [1] 
+
+
+
+An example command line is:
+
+     genome -pop 2 3 5 -N 100 -tree 1
+
+This command will simulate 3 and 5 sequences from two populations respectively.
+Each population is of size 100. Other parameters will take the default values listed in "[]".
+The genealogy trees in Newick format will be output.
+
+recombination.txt contains an example of the recombination rates profile.
+population.txt contains an example of the population history profile. 
+For detail of these two files please refer to the -N and -rec parameters in the manual.
+
+
+REFERENCE
+=========
+Liang L., Zollner S., Abecasis G.R. (2007) GENOME: a rapid coalescent-based whole genome simulator.
+Bioinformatics 23(12):1565-7
+
+
+
+Comments and suggestions are welcome!
+
+Liming Liang
+lianglim@umich.edu
diff --git a/recombination-pos.txt b/recombination-pos.txt
new file mode 100644 (file)
index 0000000..921a858
--- /dev/null
@@ -0,0 +1,6 @@
+Rec            Pos
+0.0001 1
+0.001          3
+0.5    5
+0.7    6
+0.9    7
diff --git a/recombination.txt b/recombination.txt
new file mode 100644 (file)
index 0000000..5fdc153
--- /dev/null
@@ -0,0 +1,3 @@
+Rec            Freq
+0.0001 9
+0.001          1
diff --git a/stochastic.cpp b/stochastic.cpp
new file mode 100644 (file)
index 0000000..61eea36
--- /dev/null
@@ -0,0 +1,115 @@
+////////////////////////////////////////////////////////////////////// \r
+// stochastic.cpp \r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+\r
+#include "Random.h"\r
+#include <math.h>\r
+#include <iostream>\r
+\r
+using namespace std;\r
+\r
+#define Pi 3.1415926535897932\r
+\r
+double lngamma(double z)\r
+{\r
+       double result,sum;\r
+       static double c[7]={1.000000000190015, 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};\r
+       \r
+       sum=c[0];\r
+       for(int i=1;i<=6;i++)sum += c[i]/(z+i);\r
+       \r
+       result = (z+0.5)*log(z+5.5)-(z+5.5)+log(2.5066282746310005*sum/z);\r
+       \r
+       return result;\r
+}\r
+\r
+\r
+\r
+int poissonInt(double lambda) \r
+{\r
+       static double mean=-1, waitTime,s,logmean,c;\r
+       double count, eventTime,ratio,y;\r
+       \r
+       static double maxInt=pow(2.0,(double)(8*sizeof(int)-1))-1;\r
+       \r
+       if(lambda <0){\r
+               cout<<"The mean of Poisson should be >=0, input="<<lambda<<endl;\r
+               exit(1);        \r
+       }\r
+       else if(lambda < 12){\r
+               if(lambda != mean){\r
+                       mean = lambda;\r
+                       waitTime = exp(-lambda);        \r
+               }\r
+               \r
+               count = 0;\r
+               eventTime = globalRandom.Next();\r
+               \r
+               while(eventTime > waitTime){\r
+                       count++;\r
+                       eventTime *= globalRandom.Next();       \r
+               }\r
+       }\r
+       else{\r
+               if(lambda != mean){\r
+                       mean = lambda;\r
+                       s=sqrt(2*lambda);\r
+                       logmean=log(lambda);\r
+                       c=lambda*log(lambda)-lngamma(lambda+1);\r
+               }\r
+               \r
+               do{\r
+                       do{\r
+                               y=tan(Pi*globalRandom.Next());\r
+                               count = floor(s*y+lambda);\r
+                       }while(count<0);\r
+                       \r
+                       ratio = 0.9*(1+y*y)*exp(count*logmean-lngamma(count+1)-c);\r
+                       \r
+               }while(globalRandom.Next()>ratio);\r
+               \r
+       }\r
+       \r
+       if(count>maxInt)count=maxInt;\r
+       \r
+       return (int)count;              \r
+}\r
+\r
+\r
+\r
+\r
diff --git a/stochastic.h b/stochastic.h
new file mode 100644 (file)
index 0000000..63be72c
--- /dev/null
@@ -0,0 +1,42 @@
+////////////////////////////////////////////////////////////////////// \r
+// stochastic.h \r
+//////////////////////////////////////////////////////////////////////////////\r
+//              COPYRIGHT NOTICE FOR GENOME CODE\r
+//\r
+// Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,\r
+// All rights reserved.\r
+//\r
+//   Redistribution and use in source and binary forms, with or without\r
+//   modification, are permitted provided that the following conditions\r
+//   are met:\r
+//\r
+//     1. Redistributions of source code must retain the above copyright\r
+//        notice, this list of conditions and the following disclaimer.\r
+//\r
+//     2. Redistributions in binary form must reproduce the above copyright\r
+//        notice, this list of conditions and the following disclaimer in the\r
+//        documentation and/or other materials provided with the distribution.\r
+//\r
+//     3. The names of its contributors may not be used to endorse or promote\r
+//        products derived from this software without specific prior written\r
+//        permission.\r
+//\r
+//   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
+//   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
+//   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
+//   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\r
+//   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\r
+//   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+//   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
+//   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
+//   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
+//   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
+//   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+///////////////////////////////////////////////////////////////////////////////\r
+\r
+\r
+int poissonInt(double lambda);\r
+\r
+\r
\r