--- /dev/null
+\documentclass[ignorenonframetext]{beamer}
+\usepackage{fontspec}
+\setmainfont{FreeSerif}
+\setsansfont{FreeSans}
+\setmonofont{FreeMono}
+\usepackage{url}
+\usepackage{fancyhdr}
+\usepackage{graphicx}
+\usepackage[bf]{caption}
+\usepackage{rotating}
+\usepackage{wrapfig}
+\usepackage{fancybox}
+\usepackage{booktabs}
+% \usepackage{multirow}
+\usepackage{acronym}
+\usepackage{qrcode}
+\usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
+\addbibresource{references.bib}
+% \usepackage[nomargin,inline,draft]{fixme}
+% \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
+% \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
+\usepackage{texshade}
+\usepackage{tikz}
+\usepackage{nameref}
+\usepackage{zref-xr,zref-user}
+\renewcommand*{\bibfont}{\tiny}
+
+% The textpos package is necessary to position textblocks at arbitary
+% places on the page. Use showboxes option to show outlines of textboxes.
+% \usepackage[absolute]{textpos}
+\usepackage[absolute,overlay]{textpos}
+\usepackage{mathtools,cancel}
+
+\renewcommand{\CancelColor}{\color{red}} %change cancel color to red
+\newenvironment{digression}[1]{\begin{textblock*}{64mm}(0.6\textwidth,0.2\textheight)%
+ \begin{block}{#1}}{%
+\end{block}\end{textblock*}}
+
+
+\usepackage{multirow}
+\usepackage{array}
+
+\mode<presentation>{
+ \usetheme{CambridgeUS}
+ \usecolortheme{crane}
+ % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
+ \definecolor{ilboldblue}{HTML}{002058}
+ \definecolor{ilboldorange}{HTML}{E87722}
+ \definecolor{ilblue}{HTML}{606EB2}
+ \definecolor{ilorange}{HTML}{D45D00}
+ \logo{\begin{tikzpicture}% Pale figure
+ {\node[opacity=0.6]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
+ };}%
+ \end{tikzpicture}}
+}
+
+\title[MASH]{Mash: fast genome and metagenome distance estimation}
+\author[Don Armstrong]{Don L. Armstrong}
+\institute[IGB]{Institute for Genomic Biology, Computing Genomes
+ for Reproductive Health, University of Illinois, Urbana-Champaign}
+
+\begin{document}
+
+<<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
+opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
+#opts_chunk$set(cache=TRUE, autodep=TRUE)
+options(device = function(file, width = 8, height = 7, ...) {
+ cairo_pdf(tempfile(), width = width, height = height, ...)
+})
+options(digits=2)
+library("data.table")
+library("ggplot2")
+library("reshape2")
+library("grid")
+library("xtable")
+
+@
+
+\IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
+
+ \url{https://xkcd.com/1691/}}}
+
+\frame[plain]{\titlepage
+ \begin{center}
+ Code and slides are here:
+
+ \qrcode[padding]{http://dla2.us/p/mashminhash2016}
+
+ \url{http://dla2.us/p/mashminhash2016}
+ \end{center}
+ }
+
+
+\frame[plain]{
+ \begin{center}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/paper_frontpage}
+\end{center}
+}
+
+\begin{frame}{MinHash Algorithm}
+ \begin{columns}
+ \column{0.3\textwidth}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_1.png}
+ \column{0.7\textwidth}
+ \begin{itemize}
+ \item Decompose dataset into k-mers
+ \only<2>{
+ \begin{digression}{What about strandedness}
+ \begin{itemize}
+ \item Take lexically lowest sequence
+ \begin{itemize}
+ \item Given 7-mers 5'-ACTGCAC-3' and its reverse complement, 5'-GTGCAGT-3'
+ \item A $\lt$ G
+ \item Use ACTGCAC
+ \end{itemize}
+ \item Because $S(A \cup B)$ is a random sample of $A \cup B$
+ the fraction of elements in $S(A \cup B)$ which are shared
+ by $S(A)$ and $S(B)$ is an unbiased estimate of $J(A,B)$
+ \item $J(A,B) \approx \frac{|S(A \cup B) \cap S(A) \cap (B)}{|S(A \cup B)|}$
+ \end{itemize}
+ \end{digression}
+ }
+ \item Hash k-mers (32/64bit)
+ \item Estimate Jaccard Index $J(A,B)$
+ \only<3>{
+ \begin{digression}{Estimating Jaccard Index}
+ $J(A,B) = \frac{|A \cap B|}{|A \cup B|}$
+ \begin{itemize}
+ \item Sample randomly from $A$ and $B$
+ \item Because $S(A \cup B)$ is a random sample of $A \cup B$
+ the fraction of elements in $S(A \cup B)$ which are shared
+ by $S(A)$ and $S(B)$ is an unbiased estimate of $J(A,B)$
+ \item $J(A,B) \approx \frac{|S(A \cup B) \cap S(A) \cap (B)}{|S(A \cup B)|}$
+ \end{itemize}
+ \end{digression}
+ }
+ \item<3-> How can we randomly sample?
+ \item<3-> Properties of the hash!
+ \end{itemize}
+ \end{columns}
+\end{frame}
+
+\begin{frame}{Hash functions}
+
+\end{frame}
+
+\begin{frame}{MurmerHash3 -- properties}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_2.png}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_3.png}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_4.png}
+\end{frame}
+
+\begin{frame}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_5.png}
+\end{frame}
+
+
+\section*{References}
+
+\begin{frame}[plain]{References}
+ \begin{center}
+ \mbox{}\vspace{-\baselineskip}
+ \printbibliography[heading=none]
+ \end{center}
+\end{frame}
+
+\end{document}