\documentclass[ignorenonframetext]{beamer} \usepackage{fontspec} \setmainfont{FreeSerif} \setsansfont{FreeSans} \setmonofont{FreeMono} \usepackage{url} \usepackage{fancyhdr} \usepackage{graphicx} \usepackage[bf]{caption} \usepackage{rotating} \usepackage{wrapfig} \usepackage{fancybox} \usepackage{booktabs} % \usepackage{multirow} \usepackage{acronym} \usepackage{qrcode} \usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex} \addbibresource{references.bib} % \usepackage[nomargin,inline,draft]{fixme} % \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}} % \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref} \usepackage{texshade} \usepackage{tikz} \usepackage{nameref} \usepackage{zref-xr,zref-user} \renewcommand*{\bibfont}{\tiny} % The textpos package is necessary to position textblocks at arbitary % places on the page. Use showboxes option to show outlines of textboxes. % \usepackage[absolute]{textpos} \usepackage[absolute,overlay]{textpos} \usepackage{mathtools,cancel} \renewcommand{\CancelColor}{\color{red}} %change cancel color to red \usepackage{multirow} \usepackage{array} \usepackage{minted} \usepackage{tcolorbox} \usepackage{etoolbox} \BeforeBeginEnvironment{minted}{\begin{tcolorbox}}% \AfterEndEnvironment{minted}{\end{tcolorbox}}% \mode{ \usetheme{CambridgeUS} \usecolortheme{crane} % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html \definecolor{ilboldblue}{HTML}{002058} \definecolor{ilboldorange}{HTML}{E87722} \definecolor{ilblue}{HTML}{606EB2} \definecolor{ilorange}{HTML}{D45D00} \logo{\begin{tikzpicture}% Pale figure {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}% };}% \end{tikzpicture}} } % remove navigation symbols \setbeamertemplate{navigation symbols}{} \title[Simons Genome Div.]{Simons Genome Diversity Project} \author[Don Armstrong]{Don L. Armstrong} \institute[IGB]{Institute for Genomic Biology, Computing Genomes for Reproductive Health, University of Illinois, Urbana-Champaign} \begin{document} <>= opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio") #opts_chunk$set(cache=TRUE, autodep=TRUE) options(device = function(file, width = 8, height = 7, ...) { cairo_pdf(tempfile(), width = width, height = height, ...) }) options(digits=2) library("data.table") library("ggplot2") library("reshape2") library("grid") library("xtable") @ \IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png} \url{https://xkcd.com/1706/}}} \frame[plain]{\titlepage \begin{center} Code and slides are here: \qrcode[padding]{http://dla2.us/p/genomdiv2016} \url{http://dla2.us/p/genomdiv2016} \end{center} } \frame[plain]{ \begin{center} \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/title.png} \end{center} } \begin{frame}{Sampling} \begin{itemize} \item 142 populations from Africa, America, Oceania, South Asia, East Asia, and West Eurasia (mostly indigenous) \item 300 samples sequenced at 34-83 fold coverage by Illumina \item Aligned using BWA-MEM \item Genotyped using special version of GATK and Fermikit \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?) \end{itemize} \end{frame} \begin{frame}[fragile]{Alignment Pipeline} \begin{itemize} \item Aligned to the “decoy” version of the human reference (hs37d5); supposedly improves alignment in misassembled regions or regions with CNVs? \item PCR-free data, though they marked optical duplicates marked using samblaster \end{itemize} \begin{minted}{bash} ./htscmd bamshuf -Oun128 in.bam tmp-pre \ | ./htscmd bam2fq -as aln-se.fq.gz - \ | ./trimadap \ | ./bwa mem -pt8 hs37d5.fa - \ | ./samblaster \ | samtools view -uS - \ | samtools sort -@4 -m512M - out-pre \end{minted} \end{frame} \begin{frame}[fragile]{Genotyping} \begin{itemize} \item Reference-bias; novel variants, GATK assumes reference is more likely which may not be the case. Use prior of $(0.4995,0.001,0.4995)$ instead of default $(0.9985,0.001,0.0005)$. \begin{itemize} \item Unclear what the effect of this change is on the calling \item Maybe worth thinking about? \end{itemize} \item Also used Fermikit; apparently has comparable call rates to GATK and platypus \end{itemize} \begin{minted}{bash} java -Xmx2g -jar GenomeAnalysisTK.jar \ -T UnifiedGenotyper -I srt.aln.bam \ -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \ -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \ -stand_emit_conf 5.0 -inputPrior 0.0010 \ -inputPrior 0.4995 -D dbsnp_138.b37.vcf \ -o CHR_ID.vcf -A GCContent -A BaseCounts \end{minted} \end{frame} \begin{frame}{Fermikit vs Platypus vs GATK} \begin{columns} \column{0.5\textwidth} \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png} \column{0.5\textwidth} FermiKit and Platypus call 3.17M more sites than GATK, but unclear whether those are real sites or not; they go into this in much more detail than I've digested yet. \end{columns} \end{frame} \begin{frame}{Relatedness of Populations} \begin{columns} \column{0.7\textwidth} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_1.png} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_2.png} \column{0.3\textwidth} \begin{itemize} \item Neighbor joining tree based on pairwise divergence per nucleotide \item Deepest splits are in African populations \end{itemize} \end{columns} \end{frame} \begin{frame}{PCA and Relatedness} \begin{center} \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png} \end{center} \begin{itemize} \item Greatest variation seen in the African populations (orange) \item Other populations are much more similar to eachother in general \item Hapmap likely under-measured variation in Africa \end{itemize} \end{frame} \begin{frame} \begin{center} \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png} \end{center} \begin{itemize} \item Pygmy populations have lower X heterozygosity than other African populations \item Seen even after removing the third of X which is subject to selection \item Suggests that it's driven by demographic history, and the reduced diversity is due to male-driven admixture (also in non-Africans) \end{itemize} \end{frame} \begin{frame}{Neanderthal Ancestry} \begin{center} \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png} \end{center} \begin{itemize} \item No populations studied have a higher Neanderthal ancestry than East Asians \end{itemize} \end{frame} \begin{frame}{Denisovan Ancestry} \begin{center} \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png} \end{center} \begin{itemize} \item Oceanian groups have as much as 5\% Denisovan ancestry \item Eurasian differences in ancestry; some South Asians may have higher Denisovan than other Eurasians \end{itemize} \end{frame} g \begin{frame}{Variation missed by hapmap} \begin{center} \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png} \end{center} \begin{itemize} \item Hapmap is missing up to 8\% of the heterozygous sites in parts of Africa \end{itemize} \end{frame} \begin{frame}{Selected African cross-coalescence rate/Population Size} \begin{center} \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png} \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png} \end{center} \begin{itemize} \item Separation began around 200 kya for present-day hunter-gatherers \item Shared ancestors as recently as 100 kya \end{itemize} \end{frame} \begin{frame}{Central African rainforest hunter-gatherer} \begin{center} \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png} \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png} \end{center} \begin{itemize} \item Within africa separation begins around 100 kya \item Still intermixing between Biaka (CAR, DRC), Bantu, and Luhya (Western Kenya) \end{itemize} \end{frame} \begin{frame}{Ancient, non-african} \begin{center} \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png} \includegraphics[width=0.5\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png} \end{center} \begin{itemize} \item Separation begins around 50 kya \end{itemize} \end{frame} \begin{frame}{Best-fitting admixture Graph} \begin{center} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png} \end{center} \begin{itemize} \item Present day populations have negligible ancestry from an early dispersal of modern humans out of africa \end{itemize} \end{frame} \begin{frame}{Conclusions} \begin{itemize} \item Dispersal of human populations -- little evidence of ancestry from an early dispersal of modern humans \item Possible acceleration in the rate of mutations among non-Africans \item No evidence for species-wide selective sweeps around $\approx 50$ kya (start of modern human behavior in archaeological record) [Did not see that all pairs of modern humans shared a common ancestor $\lt 100$ kya. \item Great resource for additional variants; maybe with H3A? \item Useful for my work with ancestral trees \end{itemize} \end{frame} \section*{References} \begin{frame}[plain]{References} \begin{center} \mbox{}\vspace{-\baselineskip} \printbibliography[heading=none] \end{center} \end{frame} \end{document}