--- /dev/null
+auto
+figure
+*.aux
+*.bbl
+*.bcf
+*.blg
+*.fdb_latexmk
+*.fls
+*.log
+*.out
+simons_genome_diversity_oct_2016.pdf
+simons_genome_diversity_oct_2016.tex
+*.run.xml
+*.rip
+cache
+*.lof
+*.lot
+*.toc
+*.fff
+*.ttt
+references.bib
+*.nav
+*.snm
+_minted*
+*.pyg
+*.vrb
#!/usr/bin/make -f
-all: simmons_genome_diversity_oct_2016.pdf
+all: simons_genome_diversity_oct_2016.pdf
R ?= R
%.tex: %.Rnw
$(R) --encoding=utf-8 -e "library('knitr'); knit('$<')"
-simmons_genome_diversity_oct_2016.pdf: \
-simmons_genome_diversity_oct_2016.tex genome_diversity_paper figures
+simons_genome_diversity_oct_2016.pdf: \
+simons_genome_diversity_oct_2016.tex genome_diversity_paper figures
%.pdf: %.tex $(wildcard *.bib) $(wildcard *.tex)
latexmk -f -pdf -pdflatex='xelatex -shell-escape -8bit -interaction=nonstopmode %O %S' -bibtex -use-make $<
+++ /dev/null
-\documentclass[ignorenonframetext]{beamer}
-\usepackage{fontspec}
-\setmainfont{FreeSerif}
-\setsansfont{FreeSans}
-\setmonofont{FreeMono}
-\usepackage{url}
-\usepackage{fancyhdr}
-\usepackage{graphicx}
-\usepackage[bf]{caption}
-\usepackage{rotating}
-\usepackage{wrapfig}
-\usepackage{fancybox}
-\usepackage{booktabs}
-% \usepackage{multirow}
-\usepackage{acronym}
-\usepackage{qrcode}
-\usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
-\addbibresource{references.bib}
-% \usepackage[nomargin,inline,draft]{fixme}
-% \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
-% \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
-\usepackage{texshade}
-\usepackage{tikz}
-\usepackage{nameref}
-\usepackage{zref-xr,zref-user}
-\renewcommand*{\bibfont}{\tiny}
-
-% The textpos package is necessary to position textblocks at arbitary
-% places on the page. Use showboxes option to show outlines of textboxes.
-% \usepackage[absolute]{textpos}
-\usepackage[absolute,overlay]{textpos}
-\usepackage{mathtools,cancel}
-
-\renewcommand{\CancelColor}{\color{red}} %change cancel color to red
-
-\usepackage{multirow}
-\usepackage{array}
-
-\usepackage{minted}
-\usepackage{tcolorbox}
-\usepackage{etoolbox}
-\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
-\AfterEndEnvironment{minted}{\end{tcolorbox}}%
-
-\mode<presentation>{
- \usetheme{CambridgeUS}
- \usecolortheme{crane}
- % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
- \definecolor{ilboldblue}{HTML}{002058}
- \definecolor{ilboldorange}{HTML}{E87722}
- \definecolor{ilblue}{HTML}{606EB2}
- \definecolor{ilorange}{HTML}{D45D00}
- \logo{\begin{tikzpicture}% Pale figure
- {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
- };}%
- \end{tikzpicture}}
-}
-
-% remove navigation symbols
-\setbeamertemplate{navigation symbols}{}
-
-\title[Ancestry]{Simmons Genome Diversity}
-\author[Don Armstrong]{Don L. Armstrong}
-\institute[IGB]{Institute for Genomic Biology, Computing Genomes
- for Reproductive Health, University of Illinois, Urbana-Champaign}
-
-\begin{document}
-
-<<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
-opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
-#opts_chunk$set(cache=TRUE, autodep=TRUE)
-options(device = function(file, width = 8, height = 7, ...) {
- cairo_pdf(tempfile(), width = width, height = height, ...)
-})
-options(digits=2)
-library("data.table")
-library("ggplot2")
-library("reshape2")
-library("grid")
-library("xtable")
-
-@
-
-\IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
-
- \url{https://xkcd.com/1706/}}}
-
-\frame[plain]{\titlepage
- \begin{center}
- Code and slides are here:
-
- \qrcode[padding]{http://dla2.us/p/genomdiv2016}
-
- \url{http://dla2.us/p/genomdiv2016}
- \end{center}
-}
-
-
-\frame[plain]{
- \begin{center}
- \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/title.png}
- \end{center}
-}
-
-\begin{frame}{Sampling}
- \begin{itemize}
- \item 142 populations from Africa, America, Oceania, South Asia,
- East Asia, and West Eurasia (mostly indigenous)
- \item 300 samples sequenced at 34-83 fold coverage by Illumina
- \item Aligned using BWA-MEM
- \item Genotyped using special version of GATK and Fermikit
- \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?)
- \end{itemize}
-\end{frame}
-
-
-\begin{frame}[fragile]{Alignment Pipeline}
- \begin{itemize}
- \item Aligned to the “decoy” version of the human reference
- (hs37d5); supposedly improves alignment in misassembled regions or
- regions with CNVs?
- \item PCR-free data, though they marked optical duplicates marked
- using samblaster
- \end{itemize}
-\begin{minted}{bash}
-./htscmd bamshuf -Oun128 in.bam tmp-pre \
-| ./htscmd bam2fq -as aln-se.fq.gz - \
-| ./trimadap \
-| ./bwa mem -pt8 hs37d5.fa - \
-| ./samblaster \
-| samtools view -uS - \
-| samtools sort -@4 -m512M - out-pre
-\end{minted}
-\end{frame}
-
-\begin{frame}[fragile]{Genotyping}
- \begin{itemize}
- \item Reference-bias; novel variants, GATK assumes reference is more
- likely which may not be the case. Use prior of
- $(0.4995,0.001,0.4995)$ instead of default
- $(0.9985,0.001,0.0005)$.
- \begin{itemize}
- \item Unclear what the effect of this change is on the calling
- \item Maybe worth thinking about?
- \end{itemize}
- \item Also used Fermikit; apparently has comparable call rates to
- GATK and platypus
- \end{itemize}
-\begin{minted}{bash}
- java -Xmx2g -jar GenomeAnalysisTK.jar \
- -T UnifiedGenotyper -I srt.aln.bam \
- -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \
- -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \
- -stand_emit_conf 5.0 -inputPrior 0.0010 \
- -inputPrior 0.4995 -D dbsnp_138.b37.vcf \
- -o CHR_ID.vcf -A GCContent -A BaseCounts
-\end{minted}
-\end{frame}
-
-\begin{frame}{Fermikit vs Platypus vs GATK}
- \begin{columns}
- \column{0.5\textwidth}
- \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png}
- \column{0.5\textwidth}
- FermiKit and Platypus call 3.17M more sites than GATK, but unclear
- whether those are real sites or not; they go into this in much more
- detail than I've digested yet.
-\end{columns}
-\end{frame}
-
-\begin{frame}{Relatedness of Populations}
- \begin{columns}
- \column{0.7\textwidth}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_1.png}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_2.png}
- \column{0.3\textwidth}
- \begin{itemize}
- \item Neighbor joining tree based on pairwise divergence per nucleotide
- \item Deepest splits are in African populations
- \end{itemize}
- \end{columns}
-\end{frame}
-
-\begin{frame}{PCA and Relatedness}
- \begin{center}
- \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png}
- \end{center}
- \begin{itemize}
- \item Greatest variation seen in the African populations (orange)
- \item Other populations are much more similar to eachother in
- general
- \item Hapmap likely under-measured variation in Africa
- \end{itemize}
-\end{frame}
-
-\begin{frame}
- \begin{center}
- \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png}
- \end{center}
- \begin{itemize}
- \item Pygmy populations have lower X heterozygosity than other African populations
- \item Seen even after removing the third of X which is subject to selection
- \item Suggests that it's driven by demographic history, and the
- reduced diversity is due to male-driven admixture (also in non-Africans)
- \end{itemize}
-\end{frame}
-
-\begin{frame}{Neanderthal Ancestry}
- \begin{center}
- \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png}
- \end{center}
- \begin{itemize}
- \item No populations studied have a higher Neanderthal ancestry than
- East Asians
- \end{itemize}
-\end{frame}
-
-\begin{frame}{Denisovan Ancestry}
- \begin{center}
- \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png}
- \end{center}
- \begin{itemize}
- \item Oceanian groups have as much as 5\% Denisovan ancestry
- \item Eurasian differences in ancestry; some South Asians may have
- higher Denisovan than other Eurasians
- \end{itemize}
-\end{frame}
-g
-
-\begin{frame}{Variation missed by hapmap}
- \begin{center}
- \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png}
- \end{center}
- \begin{itemize}
- \item Hapmap is missing up to 8\% of the heterozygous sites in parts
- of Africa
- \end{itemize}
-\end{frame}
-
-\begin{frame}{Cross-coalescence rate}
- \begin{center}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png}
- \end{center}
-\end{frame}
-
-\begin{frame}{Cross-coalescence rate}
- \begin{center}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png}
- \end{center}
-\end{frame}
-
-\begin{frame}{Effective Population Size}
- \begin{center}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png}
- \end{center}
-\end{frame}
-
-\begin{frame}{Best-fitting admixture Graph}
- \begin{center}
- \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png}
- \end{center}
-\end{frame}
-
-
-\section*{References}
-
-\begin{frame}[plain]{References}
- \begin{center}
- \mbox{}\vspace{-\baselineskip}
- \printbibliography[heading=none]
- \end{center}
-\end{frame}
-
-\end{document}
--- /dev/null
+\documentclass[ignorenonframetext]{beamer}
+\usepackage{fontspec}
+\setmainfont{FreeSerif}
+\setsansfont{FreeSans}
+\setmonofont{FreeMono}
+\usepackage{url}
+\usepackage{fancyhdr}
+\usepackage{graphicx}
+\usepackage[bf]{caption}
+\usepackage{rotating}
+\usepackage{wrapfig}
+\usepackage{fancybox}
+\usepackage{booktabs}
+% \usepackage{multirow}
+\usepackage{acronym}
+\usepackage{qrcode}
+\usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
+\addbibresource{references.bib}
+% \usepackage[nomargin,inline,draft]{fixme}
+% \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
+% \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
+\usepackage{texshade}
+\usepackage{tikz}
+\usepackage{nameref}
+\usepackage{zref-xr,zref-user}
+\renewcommand*{\bibfont}{\tiny}
+
+% The textpos package is necessary to position textblocks at arbitary
+% places on the page. Use showboxes option to show outlines of textboxes.
+% \usepackage[absolute]{textpos}
+\usepackage[absolute,overlay]{textpos}
+\usepackage{mathtools,cancel}
+
+\renewcommand{\CancelColor}{\color{red}} %change cancel color to red
+
+\usepackage{multirow}
+\usepackage{array}
+
+\usepackage{minted}
+\usepackage{tcolorbox}
+\usepackage{etoolbox}
+\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
+\AfterEndEnvironment{minted}{\end{tcolorbox}}%
+
+\mode<presentation>{
+ \usetheme{CambridgeUS}
+ \usecolortheme{crane}
+ % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
+ \definecolor{ilboldblue}{HTML}{002058}
+ \definecolor{ilboldorange}{HTML}{E87722}
+ \definecolor{ilblue}{HTML}{606EB2}
+ \definecolor{ilorange}{HTML}{D45D00}
+ \logo{\begin{tikzpicture}% Pale figure
+ {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
+ };}%
+ \end{tikzpicture}}
+}
+
+% remove navigation symbols
+\setbeamertemplate{navigation symbols}{}
+
+\title[Ancestry]{Simmons Genome Diversity}
+\author[Don Armstrong]{Don L. Armstrong}
+\institute[IGB]{Institute for Genomic Biology, Computing Genomes
+ for Reproductive Health, University of Illinois, Urbana-Champaign}
+
+\begin{document}
+
+<<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
+opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
+#opts_chunk$set(cache=TRUE, autodep=TRUE)
+options(device = function(file, width = 8, height = 7, ...) {
+ cairo_pdf(tempfile(), width = width, height = height, ...)
+})
+options(digits=2)
+library("data.table")
+library("ggplot2")
+library("reshape2")
+library("grid")
+library("xtable")
+
+@
+
+\IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
+
+ \url{https://xkcd.com/1706/}}}
+
+\frame[plain]{\titlepage
+ \begin{center}
+ Code and slides are here:
+
+ \qrcode[padding]{http://dla2.us/p/genomdiv2016}
+
+ \url{http://dla2.us/p/genomdiv2016}
+ \end{center}
+}
+
+
+\frame[plain]{
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/title.png}
+ \end{center}
+}
+
+\begin{frame}{Sampling}
+ \begin{itemize}
+ \item 142 populations from Africa, America, Oceania, South Asia,
+ East Asia, and West Eurasia (mostly indigenous)
+ \item 300 samples sequenced at 34-83 fold coverage by Illumina
+ \item Aligned using BWA-MEM
+ \item Genotyped using special version of GATK and Fermikit
+ \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?)
+ \end{itemize}
+\end{frame}
+
+
+\begin{frame}[fragile]{Alignment Pipeline}
+ \begin{itemize}
+ \item Aligned to the “decoy” version of the human reference
+ (hs37d5); supposedly improves alignment in misassembled regions or
+ regions with CNVs?
+ \item PCR-free data, though they marked optical duplicates marked
+ using samblaster
+ \end{itemize}
+\begin{minted}{bash}
+./htscmd bamshuf -Oun128 in.bam tmp-pre \
+| ./htscmd bam2fq -as aln-se.fq.gz - \
+| ./trimadap \
+| ./bwa mem -pt8 hs37d5.fa - \
+| ./samblaster \
+| samtools view -uS - \
+| samtools sort -@4 -m512M - out-pre
+\end{minted}
+\end{frame}
+
+\begin{frame}[fragile]{Genotyping}
+ \begin{itemize}
+ \item Reference-bias; novel variants, GATK assumes reference is more
+ likely which may not be the case. Use prior of
+ $(0.4995,0.001,0.4995)$ instead of default
+ $(0.9985,0.001,0.0005)$.
+ \begin{itemize}
+ \item Unclear what the effect of this change is on the calling
+ \item Maybe worth thinking about?
+ \end{itemize}
+ \item Also used Fermikit; apparently has comparable call rates to
+ GATK and platypus
+ \end{itemize}
+\begin{minted}{bash}
+ java -Xmx2g -jar GenomeAnalysisTK.jar \
+ -T UnifiedGenotyper -I srt.aln.bam \
+ -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \
+ -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \
+ -stand_emit_conf 5.0 -inputPrior 0.0010 \
+ -inputPrior 0.4995 -D dbsnp_138.b37.vcf \
+ -o CHR_ID.vcf -A GCContent -A BaseCounts
+\end{minted}
+\end{frame}
+
+\begin{frame}{Fermikit vs Platypus vs GATK}
+ \begin{columns}
+ \column{0.5\textwidth}
+ \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png}
+ \column{0.5\textwidth}
+ FermiKit and Platypus call 3.17M more sites than GATK, but unclear
+ whether those are real sites or not; they go into this in much more
+ detail than I've digested yet.
+\end{columns}
+\end{frame}
+
+\begin{frame}{Relatedness of Populations}
+ \begin{columns}
+ \column{0.7\textwidth}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_1.png}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1a_2.png}
+ \column{0.3\textwidth}
+ \begin{itemize}
+ \item Neighbor joining tree based on pairwise divergence per nucleotide
+ \item Deepest splits are in African populations
+ \end{itemize}
+ \end{columns}
+\end{frame}
+
+\begin{frame}{PCA and Relatedness}
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png}
+ \end{center}
+ \begin{itemize}
+ \item Greatest variation seen in the African populations (orange)
+ \item Other populations are much more similar to eachother in
+ general
+ \item Hapmap likely under-measured variation in Africa
+ \end{itemize}
+\end{frame}
+
+\begin{frame}
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png}
+ \end{center}
+ \begin{itemize}
+ \item Pygmy populations have lower X heterozygosity than other African populations
+ \item Seen even after removing the third of X which is subject to selection
+ \item Suggests that it's driven by demographic history, and the
+ reduced diversity is due to male-driven admixture (also in non-Africans)
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Neanderthal Ancestry}
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png}
+ \end{center}
+ \begin{itemize}
+ \item No populations studied have a higher Neanderthal ancestry than
+ East Asians
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Denisovan Ancestry}
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png}
+ \end{center}
+ \begin{itemize}
+ \item Oceanian groups have as much as 5\% Denisovan ancestry
+ \item Eurasian differences in ancestry; some South Asians may have
+ higher Denisovan than other Eurasians
+ \end{itemize}
+\end{frame}
+g
+
+\begin{frame}{Variation missed by hapmap}
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png}
+ \end{center}
+ \begin{itemize}
+ \item Hapmap is missing up to 8\% of the heterozygous sites in parts
+ of Africa
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Cross-coalescence rate}
+ \begin{center}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png}
+ \end{center}
+\end{frame}
+
+\begin{frame}{Cross-coalescence rate}
+ \begin{center}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png}
+ \end{center}
+\end{frame}
+
+\begin{frame}{Effective Population Size}
+ \begin{center}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png}
+ \end{center}
+\end{frame}
+
+\begin{frame}{Best-fitting admixture Graph}
+ \begin{center}
+ \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png}
+ \end{center}
+\end{frame}
+
+
+\section*{References}
+
+\begin{frame}[plain]{References}
+ \begin{center}
+ \mbox{}\vspace{-\baselineskip}
+ \printbibliography[heading=none]
+ \end{center}
+\end{frame}
+
+\end{document}