]> git.donarmstrong.com Git - presentations/genome_diversity_oct_2016.git/blob - simons_genome_diversity_oct_2016.Rnw
update the title
[presentations/genome_diversity_oct_2016.git] / simons_genome_diversity_oct_2016.Rnw
1 \documentclass[ignorenonframetext]{beamer}
2 \usepackage{fontspec}
3 \setmainfont{FreeSerif}
4 \setsansfont{FreeSans}
5 \setmonofont{FreeMono}
6 \usepackage{url}
7 \usepackage{fancyhdr}
8 \usepackage{graphicx}
9 \usepackage[bf]{caption}
10 \usepackage{rotating}
11 \usepackage{wrapfig}
12 \usepackage{fancybox}
13 \usepackage{booktabs}
14 % \usepackage{multirow}
15 \usepackage{acronym}
16 \usepackage{qrcode}
17 \usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
18 \addbibresource{references.bib}
19 % \usepackage[nomargin,inline,draft]{fixme}
20 % \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
21 % \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
22 \usepackage{texshade}
23 \usepackage{tikz}
24 \usepackage{nameref}
25 \usepackage{zref-xr,zref-user}
26 \renewcommand*{\bibfont}{\tiny}
27
28 % The textpos package is necessary to position textblocks at arbitary 
29 % places on the page.  Use showboxes option to show outlines of textboxes.
30 % \usepackage[absolute]{textpos}
31 \usepackage[absolute,overlay]{textpos}
32 \usepackage{mathtools,cancel}
33
34 \renewcommand{\CancelColor}{\color{red}} %change cancel color to red
35
36 \usepackage{multirow}
37 \usepackage{array}
38
39 \usepackage{minted}
40 \usepackage{tcolorbox}
41 \usepackage{etoolbox}
42 \BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
43 \AfterEndEnvironment{minted}{\end{tcolorbox}}%
44
45 \mode<presentation>{ 
46   \usetheme{CambridgeUS}
47   \usecolortheme{crane}
48   % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
49   \definecolor{ilboldblue}{HTML}{002058}
50   \definecolor{ilboldorange}{HTML}{E87722}
51   \definecolor{ilblue}{HTML}{606EB2}
52   \definecolor{ilorange}{HTML}{D45D00}
53   \logo{\begin{tikzpicture}% Pale figure
54       {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
55         };}%
56     \end{tikzpicture}}
57 }
58
59 % remove navigation symbols
60 \setbeamertemplate{navigation symbols}{}
61
62 \title[Simons Genome Div.]{Simons Genome Diversity Project}
63 \author[Don Armstrong]{Don L. Armstrong}
64 \institute[IGB]{Institute for Genomic Biology, Computing Genomes 
65   for Reproductive Health, University of Illinois, Urbana-Champaign}
66
67 \begin{document}
68
69 <<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
70 opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
71 #opts_chunk$set(cache=TRUE, autodep=TRUE)
72 options(device = function(file, width = 8, height = 7, ...) {
73   cairo_pdf(tempfile(), width = width, height = height, ...)
74 })
75 options(digits=2)
76 library("data.table")
77 library("ggplot2")
78 library("reshape2")
79 library("grid")
80 library("xtable")
81
82
83
84 \IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
85     
86     \url{https://xkcd.com/1706/}}}
87
88 \frame[plain]{\titlepage
89   \begin{center}
90     Code and slides are here: 
91     
92     \qrcode[padding]{http://dla2.us/p/genomdiv2016}
93     
94     \url{http://dla2.us/p/genomdiv2016}
95    \end{center}
96 }
97  
98
99 \frame[plain]{
100    \begin{center}
101      \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/title.png}
102    \end{center}
103 }
104
105 \begin{frame}{Sampling}
106   \begin{itemize}
107   \item 142 populations from Africa, America, Oceania, South Asia,
108     East Asia, and West Eurasia (mostly indigenous)
109   \item 300 samples sequenced at 34-83 fold coverage by Illumina
110   \item Aligned using BWA-MEM
111   \item Genotyped using special version of GATK and Fermikit
112   \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?)
113   \end{itemize}
114 \end{frame}
115
116
117 \begin{frame}[fragile]{Alignment Pipeline}
118   \begin{itemize}
119   \item Aligned to the “decoy” version of the human reference
120     (hs37d5); supposedly improves alignment in misassembled regions or
121     regions with CNVs?
122   \item PCR-free data, though they marked optical duplicates marked
123     using samblaster
124   \end{itemize}
125 \begin{minted}{bash}
126 ./htscmd bamshuf -Oun128 in.bam tmp-pre \
127 | ./htscmd bam2fq -as aln-se.fq.gz - \
128 | ./trimadap \
129 | ./bwa mem -pt8 hs37d5.fa - \
130 | ./samblaster \
131 | samtools view -uS - \
132 | samtools sort -@4 -m512M - out-pre
133 \end{minted}
134 \end{frame}
135
136 \begin{frame}[fragile]{Genotyping}
137   \begin{itemize}
138   \item Reference-bias; novel variants, GATK assumes reference is more
139     likely which may not be the case. Use prior of
140     $(0.4995,0.001,0.4995)$ instead of default
141     $(0.9985,0.001,0.0005)$.
142     \begin{itemize}
143     \item Unclear what the effect of this change is on the calling
144     \item Maybe worth thinking about?
145     \end{itemize}
146   \item Also used Fermikit; apparently has comparable call rates to
147     GATK and platypus
148   \end{itemize}
149 \begin{minted}{bash}
150   java -Xmx2g -jar GenomeAnalysisTK.jar \
151   -T UnifiedGenotyper -I srt.aln.bam \
152   -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \
153   -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \
154   -stand_emit_conf 5.0 -inputPrior 0.0010 \
155   -inputPrior 0.4995 -D dbsnp_138.b37.vcf \
156   -o CHR_ID.vcf -A GCContent -A BaseCounts
157 \end{minted}
158 \end{frame}
159
160 \begin{frame}{Fermikit vs Platypus vs GATK}
161   \begin{columns}
162     \column{0.5\textwidth}
163     \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png}
164     \column{0.5\textwidth}
165   FermiKit and Platypus call 3.17M more sites than GATK, but unclear
166   whether those are real sites or not; they go into this in much more
167   detail than I've digested yet.
168 \end{columns}
169 \end{frame}
170
171 \begin{frame}{Relatedness of Populations}
172   \begin{columns}
173     \column{0.7\textwidth}
174     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_1.png}
175     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_2.png}
176     \column{0.3\textwidth}
177     \begin{itemize}
178     \item Neighbor joining tree based on pairwise divergence per nucleotide
179     \item Deepest splits are in African populations
180     \end{itemize}
181   \end{columns}
182 \end{frame}
183
184 \begin{frame}{PCA and Relatedness}
185   \begin{center}
186     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png}
187   \end{center}
188   \begin{itemize}
189   \item Greatest variation seen in the African populations (orange)
190   \item Other populations are much more similar to eachother in
191     general
192   \item Hapmap likely under-measured variation in Africa
193   \end{itemize}
194 \end{frame}
195
196 \begin{frame}
197   \begin{center}
198     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png}
199   \end{center}
200   \begin{itemize}
201   \item Pygmy populations have lower X heterozygosity than other African populations
202   \item Seen even after removing the third of X which is subject to selection
203   \item Suggests that it's driven by demographic history, and the
204     reduced diversity is due to male-driven admixture (also in non-Africans)
205   \end{itemize}
206 \end{frame}
207
208 \begin{frame}{Neanderthal Ancestry}
209   \begin{center}
210     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png}
211   \end{center}
212   \begin{itemize}
213   \item No populations studied have a higher Neanderthal ancestry than
214     East Asians
215   \end{itemize}
216 \end{frame}
217
218 \begin{frame}{Denisovan Ancestry}
219   \begin{center}
220     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png}
221   \end{center}
222   \begin{itemize}
223   \item Oceanian groups have as much as 5\% Denisovan ancestry
224   \item Eurasian differences in ancestry; some South Asians may have
225     higher Denisovan than other Eurasians
226   \end{itemize}
227 \end{frame}
228 g
229
230 \begin{frame}{Variation missed by hapmap}
231   \begin{center}
232     \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png}
233   \end{center}
234   \begin{itemize}
235   \item Hapmap is missing up to 8\% of the heterozygous sites in parts
236     of Africa
237   \end{itemize}
238 \end{frame}
239
240 \begin{frame}{Selected African cross-coalescence rate/Population Size}
241   \begin{center}
242     \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png}
243     \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png}
244   \end{center}
245   \begin{itemize}
246   \item Separation began around 200 kya for present-day hunter-gatherers
247   \item Shared ancestors as recently as 100 kya
248   \end{itemize}
249 \end{frame}
250
251 \begin{frame}{Central African rainforest hunter-gatherer}
252   \begin{center}
253     \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png}
254     \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png}
255   \end{center}
256   \begin{itemize}
257   \item Within africa separation begins around 100 kya
258   \item Still intermixing between Biaka (CAR, DRC), Bantu, and Luhya (Western Kenya)
259   \end{itemize}
260 \end{frame}
261
262 \begin{frame}{Ancient, non-african}
263   \begin{center}
264     \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png}
265     \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png}
266   \end{center}
267   \begin{itemize}
268   \item Separation begins around 50 kya
269   \end{itemize}
270 \end{frame}
271
272 \begin{frame}{Best-fitting admixture Graph}
273   \begin{center}
274     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png}
275   \end{center}
276   \begin{itemize}
277   \item Present day populations have negligible ancestry from an early
278     dispersal of modern humans out of africa
279   \end{itemize}
280 \end{frame}
281
282
283 \begin{frame}{Conclusions}
284   \begin{itemize}
285   \item Dispersal of human populations -- little evidence of ancestry
286     from an early dispersal of modern humans
287   \item Possible acceleration in the rate of mutations among non-Africans
288   \item No evidence for species-wide selective sweeps around
289     $\approx 50$ kya (start of modern human behavior in archaeological
290     record) [Did not see that all pairs of modern humans shared a
291     common ancestor $\lt 100$ kya.
292   \item Great resource for additional variants; maybe with H3A?
293   \item Useful for my work with ancestral trees
294   \end{itemize}
295 \end{frame}
296
297 \section*{References}
298
299 \begin{frame}[plain]{References}
300   \begin{center}
301     \mbox{}\vspace{-\baselineskip}
302     \printbibliography[heading=none]
303   \end{center}
304 \end{frame}
305
306 \end{document}