]> git.donarmstrong.com Git - presentations/genome_diversity_oct_2016.git/commitdiff
add methods section
authorDon Armstrong <don@donarmstrong.com>
Wed, 19 Oct 2016 19:53:24 +0000 (12:53 -0700)
committerDon Armstrong <don@donarmstrong.com>
Wed, 19 Oct 2016 19:53:24 +0000 (12:53 -0700)
simmons_genome_diversity_oct_2016.Rnw

index 04814b4d0f0bb29ea62aa9a93dda6fbbe9c9ed76..da88aec75ba931062528c5d0c257f4f5c80d4d6d 100644 (file)
 \usepackage{multirow}
 \usepackage{array}
 
+\usepackage{minted}
+\usepackage{tcolorbox}
+\usepackage{etoolbox}
+\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
+\AfterEndEnvironment{minted}{\end{tcolorbox}}%
+
 \mode<presentation>{ 
   \usetheme{CambridgeUS}
   \usecolortheme{crane}
   \definecolor{ilblue}{HTML}{606EB2}
   \definecolor{ilorange}{HTML}{D45D00}
   \logo{\begin{tikzpicture}% Pale figure
-      {\node[opacity=0.6]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
+      {\node[opacity=0.1]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
         };}%
     \end{tikzpicture}}
 }
 
+% remove navigation symbols
+\setbeamertemplate{navigation symbols}{}
+
 \title[Ancestry]{Simmons Genome Diversity}
 \author[Don Armstrong]{Don L. Armstrong}
 \institute[IGB]{Institute for Genomic Biology, Computing Genomes 
@@ -80,9 +89,9 @@ library("xtable")
   \begin{center}
     Code and slides are here: 
     
-    \qrcode[padding]{http://dla2.us/p/genomdev2016}
+    \qrcode[padding]{http://dla2.us/p/genomdiv2016}
     
-    \url{http://dla2.us/p/genomdev2016}
+    \url{http://dla2.us/p/genomdiv2016}
    \end{center}
 }
  
@@ -93,6 +102,72 @@ library("xtable")
    \end{center}
 }
 
+\begin{frame}{Sampling}
+  \begin{itemize}
+  \item 142 populations from Africa, America, Oceania, South Asia,
+    East Asia, and West Eurasia (mostly indigenous)
+  \item 300 samples sequenced at 34-83 fold coverage by Illumina
+  \item Aligned using BWA-MEM
+  \item Genotyped using special version of GATK and Fermikit
+  \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?)
+  \end{itemize}
+\end{frame}
+
+
+\begin{frame}[fragile]{Alignment Pipeline}
+  \begin{itemize}
+  \item Aligned to the “decoy” version of the human reference
+    (hs37d5); supposedly improves alignment in misassembled regions or
+    regions with CNVs?
+  \item PCR-free data, though they marked optical duplicates marked
+    using samblaster
+  \end{itemize}
+\begin{minted}{bash}
+./htscmd bamshuf -Oun128 in.bam tmp-pre \
+| ./htscmd bam2fq -as aln-se.fq.gz - \
+| ./trimadap \
+| ./bwa mem -pt8 hs37d5.fa - \
+| ./samblaster \
+| samtools view -uS - \
+| samtools sort -@4 -m512M - out-pre
+\end{minted}
+\end{frame}
+
+\begin{frame}[fragile]{Genotyping}
+  \begin{itemize}
+  \item Reference-bias; novel variants, GATK assumes reference is more
+    likely which may not be the case. Use prior of
+    $(0.4995,0.001,0.4995)$ instead of default
+    $(0.9985,0.001,0.0005)$.
+    \begin{itemize}
+    \item Unclear what the effect of this change is on the calling
+    \item Maybe worth thinking about?
+    \end{itemize}
+  \item Also used Fermikit; apparently has comparable call rates to
+    GATK and platypus
+  \end{itemize}
+\begin{minted}{bash}
+  java -Xmx2g -jar GenomeAnalysisTK.jar \
+  -T UnifiedGenotyper -I srt.aln.bam \
+  -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \
+  -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \
+  -stand_emit_conf 5.0 -inputPrior 0.0010 \
+  -inputPrior 0.4995 -D dbsnp_138.b37.vcf \
+  -o CHR_ID.vcf -A GCContent -A BaseCounts
+\end{minted}
+\end{frame}
+
+\begin{frame}{Fermikit vs Platypus vs GATK}
+  \begin{columns}
+    \column{0.5\textwidth}
+    \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png}
+    \column{0.5\textwidth}
+  FermiKit and Platypus call 3.17M more sites than GATK, but unclear
+  whether those are real sites or not; they go into this in much more
+  detail than I've digested yet.
+\end{columns}
+\end{frame}
+
 \begin{frame}{Relatedness of Populations}
   \begin{columns}
     \column{0.7\textwidth}
@@ -107,55 +182,88 @@ library("xtable")
 \end{frame}
 
 \begin{frame}{PCA and Relatedness}
-  \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png}
+  \begin{center}
+    \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/fig_ed4.png}
+  \end{center}
+  \begin{itemize}
+  \item Greatest variation seen in the African populations (orange)
+  \item Other populations are much more similar to eachother in
+    general
+  \item Hapmap likely under-measured variation in Africa
+  \end{itemize}
 \end{frame}
 
 \begin{frame}
   \begin{center}
-    \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png}
+    \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1b.png}
   \end{center}
+  \begin{itemize}
+  \item Pygmy populations have lower X heterozygosity than other African populations
+  \item Seen even after removing the third of X which is subject to selection
+  \item Suggests that it's driven by demographic history, and the
+    reduced diversity is due to male-driven admixture (also in non-Africans)
+  \end{itemize}
 \end{frame}
 
-\begin{frame}
+\begin{frame}{Neanderthal Ancestry}
   \begin{center}
-    \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png}
+    \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1c.png}
   \end{center}
+  \begin{itemize}
+  \item No populations studied have a higher Neanderthal ancestry than
+    East Asians
+  \end{itemize}
 \end{frame}
 
-\begin{frame}
+\begin{frame}{Denisovan Ancestry}
   \begin{center}
-    \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png}
+    \includegraphics[width=\textwidth,height=0.6\textheight,keepaspectratio]{genome_diversity_paper/fig1d.png}
   \end{center}
+  \begin{itemize}
+  \item Oceanian groups have as much as 5\% Denisovan ancestry
+  \item Eurasian differences in ancestry; some South Asians may have
+    higher Denisovan than other Eurasians
+  \end{itemize}
 \end{frame}
+g
 
-
-\begin{frame}
+\begin{frame}{Variation missed by hapmap}
   \begin{center}
     \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig_ed1.png}
   \end{center}
+  \begin{itemize}
+  \item Hapmap is missing up to 8\% of the heterozygous sites in parts
+    of Africa
+  \end{itemize}
 \end{frame}
 
-\begin{frame}
+\begin{frame}{Cross-coalescence rate}
   \begin{center}
     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2a.png}
     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2b.png}
   \end{center}
 \end{frame}
 
-\begin{frame}
+\begin{frame}{Cross-coalescence rate}
   \begin{center}
     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2c.png}
     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2d.png}
   \end{center}
 \end{frame}
 
-\begin{frame}
+\begin{frame}{Effective Population Size}
   \begin{center}
     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2e.png}
     \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig2f.png}
   \end{center}
 \end{frame}
 
+\begin{frame}{Best-fitting admixture Graph}
+  \begin{center}
+    \includegraphics[width=0.5\textwidth,height=0.8\textheight,keepaspectratio]{genome_diversity_paper/fig3.png}
+  \end{center}
+\end{frame}
+
 
 \section*{References}