+\begin{frame}{Sampling}
+ \begin{itemize}
+ \item 142 populations from Africa, America, Oceania, South Asia,
+ East Asia, and West Eurasia (mostly indigenous)
+ \item 300 samples sequenced at 34-83 fold coverage by Illumina
+ \item Aligned using BWA-MEM
+ \item Genotyped using special version of GATK and Fermikit
+ \item Data available in EBI ($n=279$, PRJEB9586) and dbGAP ($n=21$, ?)
+ \end{itemize}
+\end{frame}
+
+
+\begin{frame}[fragile]{Alignment Pipeline}
+ \begin{itemize}
+ \item Aligned to the “decoy” version of the human reference
+ (hs37d5); supposedly improves alignment in misassembled regions or
+ regions with CNVs?
+ \item PCR-free data, though they marked optical duplicates marked
+ using samblaster
+ \end{itemize}
+\begin{minted}{bash}
+./htscmd bamshuf -Oun128 in.bam tmp-pre \
+| ./htscmd bam2fq -as aln-se.fq.gz - \
+| ./trimadap \
+| ./bwa mem -pt8 hs37d5.fa - \
+| ./samblaster \
+| samtools view -uS - \
+| samtools sort -@4 -m512M - out-pre
+\end{minted}
+\end{frame}
+
+\begin{frame}[fragile]{Genotyping}
+ \begin{itemize}
+ \item Reference-bias; novel variants, GATK assumes reference is more
+ likely which may not be the case. Use prior of
+ $(0.4995,0.001,0.4995)$ instead of default
+ $(0.9985,0.001,0.0005)$.
+ \begin{itemize}
+ \item Unclear what the effect of this change is on the calling
+ \item Maybe worth thinking about?
+ \end{itemize}
+ \item Also used Fermikit; apparently has comparable call rates to
+ GATK and platypus
+ \end{itemize}
+\begin{minted}{bash}
+ java -Xmx2g -jar GenomeAnalysisTK.jar \
+ -T UnifiedGenotyper -I srt.aln.bam \
+ -L CHR_ID -R hs37d5.fa -dcov 600 -glm SNP \
+ -out_mode EMIT_ALL_SITES -stand_call_conf 5.0 \
+ -stand_emit_conf 5.0 -inputPrior 0.0010 \
+ -inputPrior 0.4995 -D dbsnp_138.b37.vcf \
+ -o CHR_ID.vcf -A GCContent -A BaseCounts
+\end{minted}
+\end{frame}
+
+\begin{frame}{Fermikit vs Platypus vs GATK}
+ \begin{columns}
+ \column{0.5\textwidth}
+ \includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{genome_diversity_paper/supplemental-014.png}
+ \column{0.5\textwidth}
+ FermiKit and Platypus call 3.17M more sites than GATK, but unclear
+ whether those are real sites or not; they go into this in much more
+ detail than I've digested yet.
+\end{columns}
+\end{frame}
+