1 //////////////////////////////////////////////////////////////////////
\r
3 //////////////////////////////////////////////////////////////////////////////
\r
4 // COPYRIGHT NOTICE FOR GENOME CODE
\r
6 // Copyright (C) 2006 - 2009, Liming Liang and Goncalo Abecasis,
\r
7 // All rights reserved.
\r
9 // Redistribution and use in source and binary forms, with or without
\r
10 // modification, are permitted provided that the following conditions
\r
13 // 1. Redistributions of source code must retain the above copyright
\r
14 // notice, this list of conditions and the following disclaimer.
\r
16 // 2. Redistributions in binary form must reproduce the above copyright
\r
17 // notice, this list of conditions and the following disclaimer in the
\r
18 // documentation and/or other materials provided with the distribution.
\r
20 // 3. The names of its contributors may not be used to endorse or promote
\r
21 // products derived from this software without specific prior written
\r
24 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
\r
25 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
\r
26 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
\r
27 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
\r
28 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
\r
29 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
\r
30 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
\r
31 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
\r
32 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
\r
33 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
\r
34 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
36 ///////////////////////////////////////////////////////////////////////////////
\r
46 #include <stdlib.h>
\r
50 using namespace std;
\r
53 int main(int argc, char ** argv)
\r
55 string popSize("10000"), rec("0.0001");
\r
57 int numPieces=100, pieceLen=10000, numIndepRegion=1, SNP=-1;
\r
59 vector<int> nSubSample(nSubPOP);
\r
60 for(int i=0;i<nSubPOP;i++)nSubSample[i]=10;
\r
62 double mut=1e-8, mig=2.5e-4;
\r
63 double maf=0.0,prop=1.0;
\r
65 vector< vector<bool> > data;
\r
72 cout << endl << "GENOME-0.2: Whole Genome Coalescent Simulator. (2009.2) Liming Liang, Goncalo Abecasis" << endl << endl;
\r
73 cout << " Parameters and default values:" << endl;
\r
74 cout << " -pop " << "number of subpopulations and size of subsamples [ " <<nSubPOP<<" ";
\r
75 for(int i=0;i<nSubPOP;i++)cout<<nSubSample[i]<<" ";cout<<"]"<< endl;
\r
76 cout << " -N " << "effective size of each subpopulation or the filename for population profile [" <<popSize.c_str()<<"]"<< endl;
\r
77 cout << " -c " << "number of independent regions (chromosomes) to simulate [" <<numIndepRegion<<"]"<< endl;
\r
78 cout << " -pieces " << "number of fragments per independent region [" <<numPieces<<"]"<< endl;
\r
79 cout << " -len " << "length in base of each fragment [" <<pieceLen<<"]"<< endl;
\r
80 cout << " -s " << "fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [" <<SNP<<"]"<< endl;
\r
81 cout << " -rec " << "recombination rate bewteen consecutive fragments"<<" or the filename for recombination rate distribution ["<<rec.c_str()<<"]"<< endl;
\r
82 cout << " -mut " << "mutation rate per generation per base pair [" <<mut<<"]"<< endl;
\r
83 cout << " -mig " << "migration rate per generation per individual [" <<mig<<"]"<< endl;
\r
84 cout << " -seed " << "random seed, -1 = use time as the seed [" <<seed<<"]"<< endl;
\r
85 cout << " -tree " << "1=draw the genealogy trees, 0=do not output [" <<drawtree<<"]"<< endl;
\r
86 cout << " -maf " << "Output SNPs with minor allele frequency greater than [" <<maf<<"]"<< endl;
\r
87 cout << " -prop " << "To keep this proportion of SNPs with MAF < the value of -maf parameter [" <<prop<<"]"<< endl;
\r
93 for(int i=1;i<argc;i++){
\r
94 if(strcmp(argv[i],"-pop")==0){
\r
95 nSubPOP=atoi(argv[++i]); k++;
\r
97 nSubSample.resize(nSubPOP);
\r
98 for(int iter=0;iter<nSubPOP;iter++)nSubSample[iter]=atoi(argv[++i]);
\r
100 else if(strcmp(argv[i],"-N")==0){ popSize.assign(argv[++i]); k++;}
\r
101 else if(strcmp(argv[i],"-c")==0){ numIndepRegion=atoi(argv[++i]); k++;}
\r
102 else if(strcmp(argv[i],"-pieces")==0){ numPieces=atoi(argv[++i]); k++;}
\r
103 else if(strcmp(argv[i],"-len")==0){ pieceLen=atoi(argv[++i]); k++;}
\r
104 else if(strcmp(argv[i],"-s")==0){ SNP=atoi(argv[++i]); k++;}
\r
105 else if(strcmp(argv[i],"-rec")==0){ rec.assign(argv[++i]); k++;}
\r
106 else if(strcmp(argv[i],"-mut")==0){ mut=atof(argv[++i]); k++;}
\r
107 else if(strcmp(argv[i],"-mig")==0){ mig=atof(argv[++i]); k++;}
\r
108 else if(strcmp(argv[i],"-seed")==0){ seed=atol(argv[++i]); k++;}
\r
109 else if(strcmp(argv[i],"-tree")==0){ drawtree=atoi(argv[++i]); k++;}
\r
110 else if(strcmp(argv[i],"-maf")==0){ maf=atof(argv[++i]); k++;}
\r
111 else if(strcmp(argv[i],"-prop")==0){ prop=atof(argv[++i]); k++;}
\r
114 if(k!=(argc-1)/2 && k!=(argc-1-nSubPOP)/2){
\r
115 cout << "Parameters do not match! " << argc<< " " << k << endl;
\r
120 << "GENOME-0.2: Whole Genome Coalescent Simulator. (2009.2) Liming Liang, Goncalo Abecasis" << endl << endl;
\r
121 cout << " Parameters and effective values:" << endl;
\r
122 cout << " -pop " << "number of subpopulations and size of subsamples [ " <<nSubPOP<<" ";
\r
123 for(int i=0;i<nSubPOP;i++)cout<<nSubSample[i]<<" ";cout<<"]"<< endl;
\r
124 cout << " -N " << "effective size of each subpopulation or the filename for population profile [" <<popSize.c_str()<<"]"<< endl;
\r
125 cout << " -c " << "number of independent regions (chromosomes) to simulate [" <<numIndepRegion<<"]"<< endl;
\r
126 cout << " -pieces " << "number of fragments per independent region [" <<numPieces<<"]"<< endl;
\r
127 cout << " -len " << "length in base of each fragment [" <<pieceLen<<"]"<< endl;
\r
128 cout << " -s " << "fixed number of SNPs per independent region (chromosome), -1 = number of SNPs follows Poisson distribution [" <<SNP<<"]"<< endl;
\r
129 cout << " -rec " << "recombination rate bewteen consecutive fragments or the filename for recombination rate distribution ["<<rec.c_str()<<"]"<< endl;
\r
130 cout << " -mut " << "mutation rate per generation per base pair [" <<mut<<"]"<< endl;
\r
131 cout << " -mig " << "migration rate per generation per individual [" <<mig<<"]"<< endl;
\r
132 cout << " -seed " << "random seed, -1 = use time as the seed [" <<seed<<"]"<< endl;
\r
133 cout << " -tree " << "1=draw the genealogy trees, 0=do not output [" <<drawtree<<"]"<< endl;
\r
134 cout << " -maf " << "Output SNPs with minor allele frequency greater than [" <<maf<<"]"<< endl;
\r
135 cout << " -prop " << "To keep this proportion of SNPs with MAF < the value of -maf parameter [" <<prop<<"]"<< endl<<endl;
\r
137 if(atoi(popSize.c_str())>0 && atof(rec.c_str())>0){
\r
138 printf(" Scaled recombination rate = %.4e\n", 2 * atoi(popSize.c_str()) * (numPieces - 1) * atof(rec.c_str()));
\r
139 printf(" Scaled migration rate = %.4e\n", 2 * atoi(popSize.c_str()) * mig);
\r
140 printf(" Scaled mutation rate = %.4e\n\n", 2 * atoi(popSize.c_str()) * mut * numPieces * pieceLen);
\r
143 genome(popSize, nSubPOP, nSubSample, numPieces, pieceLen, numIndepRegion, SNP, rec, mut, mig, data, seed,drawtree);
\r
145 // output format for haploview
\r
147 // for(int i=0;i<data.size();i++){
\r
148 // printf("FAMILY1 MEMBER%d ",i/2+1);
\r
149 // for(int j=0;j<data[i].size();j++){
\r
150 // cout<<data[i][j]+1<<" ";
\r
156 // 0 = ancestral state, 1 = mutated state
\r
158 cout<<"Total number of SNPs = "<<data[0].size()<<endl;
\r
159 cout<<"Samples:"<<endl;
\r
161 if(maf>0.0 && prop < 1.0){ // filter out the SNPs with MAF < maf
\r
163 cout<<"Rare SNPs filtered according to parameters."<<endl;
\r
166 for(int k=0;k<nSubPOP;k++)n+=nSubSample[k];
\r
168 int m=(int)(data[0].size());
\r
169 vector<bool> keep(m,true);
\r
172 for(int i=0;i<m;i++){
\r
174 for(int j=0;j<n;j++){
\r
179 if( (p<maf || (1-p)<maf) && globalRandom.Next()>prop)keep[i]=0;
\r
184 for(int k=0;k<nSubPOP;k++){
\r
185 for(int i=0;i<nSubSample[k];i++){
\r
186 cout<<"POP"<<k+1<<": ";
\r
187 for(unsigned int j=0;j<data[index].size();j++){
\r
188 if(keep[j])cout<<data[index][j];
\r
198 for(int k=0;k<nSubPOP;k++){
\r
199 for(int i=0;i<nSubSample[k];i++){
\r
200 cout<<"POP"<<k+1<<": ";
\r
201 for(unsigned int j=0;j<data[index].size();j++){
\r
202 cout<<data[index][j];
\r