From 0e0b8e9b4d2ccd05f2e2df0c7f11da3ff120451c Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Wed, 19 Oct 2016 12:53:24 -0700 Subject: [PATCH] add methods section --- simmons_genome_diversity_oct_2016.Rnw | 136 +++++++++++++++++++++++--- 1 file changed, 122 insertions(+), 14 deletions(-) diff --git a/simmons_genome_diversity_oct_2016.Rnw b/simmons_genome_diversity_oct_2016.Rnw index 04814b4..da88aec 100644 --- a/simmons_genome_diversity_oct_2016.Rnw +++ b/simmons_genome_diversity_oct_2016.Rnw @@ -36,6 +36,12 @@ \usepackage{multirow} \usepackage{array} +\usepackage{minted} +\usepackage{tcolorbox} +\usepackage{etoolbox} +\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}% +\AfterEndEnvironment{minted}{\end{tcolorbox}}% + \mode{ \usetheme{CambridgeUS} \usecolortheme{crane} @@ -45,11 +51,14 @@ \definecolor{ilblue}{HTML}{606EB2} \definecolor{ilorange}{HTML}{D45D00} \logo{\begin{tikzpicture}% Pale figure - {\node[opacity=0.6]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}% + {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}% };}% \end{tikzpicture}} } +% remove navigation symbols +\setbeamertemplate{navigation symbols}{} + \title[Ancestry]{Simmons Genome Diversity} \author[Don Armstrong]{Don L. Armstrong} \institute[IGB]{Institute for Genomic Biology, Computing Genomes @@ -80,9 +89,9 @@ library("xtable") \begin{center} Code and slides are here: - \qrcode[padding]{http://dla2.us/p/genomdev2016} + \qrcode[padding]{http://dla2.us/p/genomdiv2016} - \url{http://dla2.us/p/genomdev2016} + \url{http://dla2.us/p/genomdiv2016} \end{center} } @@ -93,6 +102,72 @@ library("xtable") \end{center} } +\begin{frame}{Sampling} + \begin{itemize} + \item 142 populations from Africa, America, Oceania, South Asia, + East Asia, and West Eurasia (mostly indigenous) + \item 300 samples sequenced at 34-83 fold coverage by Illumina + \item Aligned using BWA-MEM + \item Genotyped using special version of GATK and Fermikit + \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?) + \end{itemize} +\end{frame} + + +\begin{frame}[fragile]{Alignment Pipeline} + \begin{itemize} + \item Aligned to the “decoy” version of the human reference + (hs37d5); supposedly improves alignment in misassembled regions or + regions with CNVs? + \item PCR-free data, though they marked optical duplicates marked + using samblaster + \end{itemize} +\begin{minted}{bash} +./htscmd bamshuf -Oun128 in.bam tmp-pre \ +| ./htscmd bam2fq -as aln-se.fq.gz - \ +| ./trimadap \ +| ./bwa mem -pt8 hs37d5.fa - \ +| ./samblaster \ +| samtools view -uS - \ +| samtools sort -@4 -m512M - out-pre +\end{minted} +\end{frame} + +\begin{frame}[fragile]{Genotyping} + \begin{itemize} + \item Reference-bias; novel variants, GATK assumes reference is more + likely which may not be the case. Use prior of + $(0.4995,0.001,0.4995)$ instead of default + $(0.9985,0.001,0.0005)$. + \begin{itemize} + \item Unclear what the effect of this change is on the calling + \item Maybe worth thinking about? + \end{itemize} + \item Also used Fermikit; apparently has comparable call rates to + GATK and platypus + \end{itemize} +\begin{minted}{bash} + java -Xmx2g -jar GenomeAnalysisTK.jar \ + -T UnifiedGenotyper -I srt.aln.bam \ + -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \ + -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \ + -stand_emit_conf 5.0 -inputPrior 0.0010 \ + -inputPrior 0.4995 -D dbsnp_138.b37.vcf \ + -o CHR_ID.vcf -A GCContent -A BaseCounts +\end{minted} +\end{frame} + +\begin{frame}{Fermikit vs Platypus vs GATK} + \begin{columns} + \column{0.5\textwidth} + \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png} + \column{0.5\textwidth} + FermiKit and Platypus call 3.17M more sites than GATK, but unclear + whether those are real sites or not; they go into this in much more + detail than I've digested yet. +\end{columns} +\end{frame} + \begin{frame}{Relatedness of Populations} \begin{columns} \column{0.7\textwidth} @@ -107,55 +182,88 @@ library("xtable") \end{frame} \begin{frame}{PCA and Relatedness} - \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png} + \begin{center} + \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png} + \end{center} + \begin{itemize} + \item Greatest variation seen in the African populations (orange) + \item Other populations are much more similar to eachother in + general + \item Hapmap likely under-measured variation in Africa + \end{itemize} \end{frame} \begin{frame} \begin{center} - \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png} + \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png} \end{center} + \begin{itemize} + \item Pygmy populations have lower X heterozygosity than other African populations + \item Seen even after removing the third of X which is subject to selection + \item Suggests that it's driven by demographic history, and the + reduced diversity is due to male-driven admixture (also in non-Africans) + \end{itemize} \end{frame} -\begin{frame} +\begin{frame}{Neanderthal Ancestry} \begin{center} - \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png} + \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png} \end{center} + \begin{itemize} + \item No populations studied have a higher Neanderthal ancestry than + East Asians + \end{itemize} \end{frame} -\begin{frame} +\begin{frame}{Denisovan Ancestry} \begin{center} - \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png} + \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png} \end{center} + \begin{itemize} + \item Oceanian groups have as much as 5\% Denisovan ancestry + \item Eurasian differences in ancestry; some South Asians may have + higher Denisovan than other Eurasians + \end{itemize} \end{frame} +g - -\begin{frame} +\begin{frame}{Variation missed by hapmap} \begin{center} \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png} \end{center} + \begin{itemize} + \item Hapmap is missing up to 8\% of the heterozygous sites in parts + of Africa + \end{itemize} \end{frame} -\begin{frame} +\begin{frame}{Cross-coalescence rate} \begin{center} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png} \end{center} \end{frame} -\begin{frame} +\begin{frame}{Cross-coalescence rate} \begin{center} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png} \end{center} \end{frame} -\begin{frame} +\begin{frame}{Effective Population Size} \begin{center} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png} \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png} \end{center} \end{frame} +\begin{frame}{Best-fitting admixture Graph} + \begin{center} + \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png} + \end{center} +\end{frame} + \section*{References} -- 2.39.2