]> git.donarmstrong.com Git - presentations/genome_diversity_oct_2016.git/blob - simmons_genome_diversity_oct_2016.Rnw
add methods section
[presentations/genome_diversity_oct_2016.git] / simmons_genome_diversity_oct_2016.Rnw
1 \documentclass[ignorenonframetext]{beamer}
2 \usepackage{fontspec}
3 \setmainfont{FreeSerif}
4 \setsansfont{FreeSans}
5 \setmonofont{FreeMono}
6 \usepackage{url}
7 \usepackage{fancyhdr}
8 \usepackage{graphicx}
9 \usepackage[bf]{caption}
10 \usepackage{rotating}
11 \usepackage{wrapfig}
12 \usepackage{fancybox}
13 \usepackage{booktabs}
14 % \usepackage{multirow}
15 \usepackage{acronym}
16 \usepackage{qrcode}
17 \usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
18 \addbibresource{references.bib}
19 % \usepackage[nomargin,inline,draft]{fixme}
20 % \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
21 % \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
22 \usepackage{texshade}
23 \usepackage{tikz}
24 \usepackage{nameref}
25 \usepackage{zref-xr,zref-user}
26 \renewcommand*{\bibfont}{\tiny}
27
28 % The textpos package is necessary to position textblocks at arbitary 
29 % places on the page.  Use showboxes option to show outlines of textboxes.
30 % \usepackage[absolute]{textpos}
31 \usepackage[absolute,overlay]{textpos}
32 \usepackage{mathtools,cancel}
33
34 \renewcommand{\CancelColor}{\color{red}} %change cancel color to red
35
36 \usepackage{multirow}
37 \usepackage{array}
38
39 \usepackage{minted}
40 \usepackage{tcolorbox}
41 \usepackage{etoolbox}
42 \BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
43 \AfterEndEnvironment{minted}{\end{tcolorbox}}%
44
45 \mode<presentation>{ 
46   \usetheme{CambridgeUS}
47   \usecolortheme{crane}
48   % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
49   \definecolor{ilboldblue}{HTML}{002058}
50   \definecolor{ilboldorange}{HTML}{E87722}
51   \definecolor{ilblue}{HTML}{606EB2}
52   \definecolor{ilorange}{HTML}{D45D00}
53   \logo{\begin{tikzpicture}% Pale figure
54       {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
55         };}%
56     \end{tikzpicture}}
57 }
58
59 % remove navigation symbols
60 \setbeamertemplate{navigation symbols}{}
61
62 \title[Ancestry]{Simmons Genome Diversity}
63 \author[Don Armstrong]{Don L. Armstrong}
64 \institute[IGB]{Institute for Genomic Biology, Computing Genomes 
65   for Reproductive Health, University of Illinois, Urbana-Champaign}
66
67 \begin{document}
68
69 <<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
70 opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
71 #opts_chunk$set(cache=TRUE, autodep=TRUE)
72 options(device = function(file, width = 8, height = 7, ...) {
73   cairo_pdf(tempfile(), width = width, height = height, ...)
74 })
75 options(digits=2)
76 library("data.table")
77 library("ggplot2")
78 library("reshape2")
79 library("grid")
80 library("xtable")
81
82
83
84 \IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
85     
86     \url{https://xkcd.com/1706/}}}
87
88 \frame[plain]{\titlepage
89   \begin{center}
90     Code and slides are here: 
91     
92     \qrcode[padding]{http://dla2.us/p/genomdiv2016}
93     
94     \url{http://dla2.us/p/genomdiv2016}
95    \end{center}
96 }
97  
98
99 \frame[plain]{
100    \begin{center}
101      \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/title.png}
102    \end{center}
103 }
104
105 \begin{frame}{Sampling}
106   \begin{itemize}
107   \item 142 populations from Africa, America, Oceania, South Asia,
108     East Asia, and West Eurasia (mostly indigenous)
109   \item 300 samples sequenced at 34-83 fold coverage by Illumina
110   \item Aligned using BWA-MEM
111   \item Genotyped using special version of GATK and Fermikit
112   \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?)
113   \end{itemize}
114 \end{frame}
115
116
117 \begin{frame}[fragile]{Alignment Pipeline}
118   \begin{itemize}
119   \item Aligned to the “decoy” version of the human reference
120     (hs37d5); supposedly improves alignment in misassembled regions or
121     regions with CNVs?
122   \item PCR-free data, though they marked optical duplicates marked
123     using samblaster
124   \end{itemize}
125 \begin{minted}{bash}
126 ./htscmd bamshuf -Oun128 in.bam tmp-pre \
127 | ./htscmd bam2fq -as aln-se.fq.gz - \
128 | ./trimadap \
129 | ./bwa mem -pt8 hs37d5.fa - \
130 | ./samblaster \
131 | samtools view -uS - \
132 | samtools sort -@4 -m512M - out-pre
133 \end{minted}
134 \end{frame}
135
136 \begin{frame}[fragile]{Genotyping}
137   \begin{itemize}
138   \item Reference-bias; novel variants, GATK assumes reference is more
139     likely which may not be the case. Use prior of
140     $(0.4995,0.001,0.4995)$ instead of default
141     $(0.9985,0.001,0.0005)$.
142     \begin{itemize}
143     \item Unclear what the effect of this change is on the calling
144     \item Maybe worth thinking about?
145     \end{itemize}
146   \item Also used Fermikit; apparently has comparable call rates to
147     GATK and platypus
148   \end{itemize}
149 \begin{minted}{bash}
150   java -Xmx2g -jar GenomeAnalysisTK.jar \
151   -T UnifiedGenotyper -I srt.aln.bam \
152   -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \
153   -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \
154   -stand_emit_conf 5.0 -inputPrior 0.0010 \
155   -inputPrior 0.4995 -D dbsnp_138.b37.vcf \
156   -o CHR_ID.vcf -A GCContent -A BaseCounts
157 \end{minted}
158 \end{frame}
159
160 \begin{frame}{Fermikit vs Platypus vs GATK}
161   \begin{columns}
162     \column{0.5\textwidth}
163     \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png}
164     \column{0.5\textwidth}
165   FermiKit and Platypus call 3.17M more sites than GATK, but unclear
166   whether those are real sites or not; they go into this in much more
167   detail than I've digested yet.
168 \end{columns}
169 \end{frame}
170
171 \begin{frame}{Relatedness of Populations}
172   \begin{columns}
173     \column{0.7\textwidth}
174     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_1.png}
175     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_2.png}
176     \column{0.3\textwidth}
177     \begin{itemize}
178     \item Neighbor joining tree based on pairwise divergence per nucleotide
179     \item Deepest splits are in African populations
180     \end{itemize}
181   \end{columns}
182 \end{frame}
183
184 \begin{frame}{PCA and Relatedness}
185   \begin{center}
186     \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png}
187   \end{center}
188   \begin{itemize}
189   \item Greatest variation seen in the African populations (orange)
190   \item Other populations are much more similar to eachother in
191     general
192   \item Hapmap likely under-measured variation in Africa
193   \end{itemize}
194 \end{frame}
195
196 \begin{frame}
197   \begin{center}
198     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png}
199   \end{center}
200   \begin{itemize}
201   \item Pygmy populations have lower X heterozygosity than other African populations
202   \item Seen even after removing the third of X which is subject to selection
203   \item Suggests that it's driven by demographic history, and the
204     reduced diversity is due to male-driven admixture (also in non-Africans)
205   \end{itemize}
206 \end{frame}
207
208 \begin{frame}{Neanderthal Ancestry}
209   \begin{center}
210     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png}
211   \end{center}
212   \begin{itemize}
213   \item No populations studied have a higher Neanderthal ancestry than
214     East Asians
215   \end{itemize}
216 \end{frame}
217
218 \begin{frame}{Denisovan Ancestry}
219   \begin{center}
220     \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png}
221   \end{center}
222   \begin{itemize}
223   \item Oceanian groups have as much as 5\% Denisovan ancestry
224   \item Eurasian differences in ancestry; some South Asians may have
225     higher Denisovan than other Eurasians
226   \end{itemize}
227 \end{frame}
228 g
229
230 \begin{frame}{Variation missed by hapmap}
231   \begin{center}
232     \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png}
233   \end{center}
234   \begin{itemize}
235   \item Hapmap is missing up to 8\% of the heterozygous sites in parts
236     of Africa
237   \end{itemize}
238 \end{frame}
239
240 \begin{frame}{Cross-coalescence rate}
241   \begin{center}
242     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png}
243     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png}
244   \end{center}
245 \end{frame}
246
247 \begin{frame}{Cross-coalescence rate}
248   \begin{center}
249     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png}
250     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png}
251   \end{center}
252 \end{frame}
253
254 \begin{frame}{Effective Population Size}
255   \begin{center}
256     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png}
257     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png}
258   \end{center}
259 \end{frame}
260
261 \begin{frame}{Best-fitting admixture Graph}
262   \begin{center}
263     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png}
264   \end{center}
265 \end{frame}
266
267
268 \section*{References}
269
270 \begin{frame}[plain]{References}
271   \begin{center}
272     \mbox{}\vspace{-\baselineskip}
273     \printbibliography[heading=none]
274   \end{center}
275 \end{frame}
276
277 \end{document}