1 \documentclass[ignorenonframetext]{beamer}
3 \setmainfont{FreeSerif}
9 \usepackage[bf]{caption}
14 % \usepackage{multirow}
17 \usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
18 \addbibresource{references.bib}
19 % \usepackage[nomargin,inline,draft]{fixme}
20 % \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
21 % \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
25 \usepackage{zref-xr,zref-user}
26 \renewcommand*{\bibfont}{\tiny}
28 % The textpos package is necessary to position textblocks at arbitary
29 % places on the page. Use showboxes option to show outlines of textboxes.
30 % \usepackage[absolute]{textpos}
31 \usepackage[absolute,overlay]{textpos}
32 \usepackage{mathtools,cancel}
34 \renewcommand{\CancelColor}{\color{red}} %change cancel color to red
35 \newenvironment{digression}[1]{\begin{textblock*}{64mm}(0.6\textwidth,0.2\textheight)%
37 \end{block}\end{textblock*}}
44 \usetheme{CambridgeUS}
46 % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
47 \definecolor{ilboldblue}{HTML}{002058}
48 \definecolor{ilboldorange}{HTML}{E87722}
49 \definecolor{ilblue}{HTML}{606EB2}
50 \definecolor{ilorange}{HTML}{D45D00}
51 \logo{\begin{tikzpicture}% Pale figure
52 {\node[opacity=0.6]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
57 \title[MASH]{Mash: fast genome and metagenome distance estimation}
58 \author[Don Armstrong]{Don L. Armstrong}
59 \institute[IGB]{Institute for Genomic Biology, Computing Genomes
60 for Reproductive Health, University of Illinois, Urbana-Champaign}
64 <<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
65 opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
66 #opts_chunk$set(cache=TRUE, autodep=TRUE)
67 options(device = function(file, width = 8, height = 7, ...) {
68 cairo_pdf(tempfile(), width = width, height = height, ...)
79 \IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
81 \url{https://xkcd.com/1691/}}}
83 \frame[plain]{\titlepage
85 Code and slides are here:
87 \qrcode[padding]{http://dla2.us/p/mashminhash2016}
89 \url{http://dla2.us/p/mashminhash2016}
96 \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/paper_frontpage}
100 \begin{frame}{MinHash Algorithm}
102 \column{0.3\textwidth}
103 \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_1.png}
104 \column{0.7\textwidth}
106 \item Decompose dataset into k-mers
108 \begin{digression}{What about strandedness}
110 \item Take lexically lowest sequence
112 \item Given 7-mers 5'-ACTGCAC-3' and its reverse complement, 5'-GTGCAGT-3'
116 \item Because $S(A \cup B)$ is a random sample of $A \cup B$
117 the fraction of elements in $S(A \cup B)$ which are shared
118 by $S(A)$ and $S(B)$ is an unbiased estimate of $J(A,B)$
119 \item $J(A,B) \approx \frac{|S(A \cup B) \cap S(A) \cap (B)}{|S(A \cup B)|}$
123 \item Hash k-mers (32/64bit)
124 \item Estimate Jaccard Index $J(A,B)$
126 \begin{digression}{Estimating Jaccard Index}
127 $J(A,B) = \frac{|A \cap B|}{|A \cup B|}$
129 \item Sample randomly from $A$ and $B$
130 \item Because $S(A \cup B)$ is a random sample of $A \cup B$
131 the fraction of elements in $S(A \cup B)$ which are shared
132 by $S(A)$ and $S(B)$ is an unbiased estimate of $J(A,B)$
133 \item $J(A,B) \approx \frac{|S(A \cup B) \cap S(A) \cap (B)}{|S(A \cup B)|}$
137 \item<3-> How can we randomly sample?
138 \item<3-> Properties of the hash!
143 \begin{frame}{Hash functions}
147 \begin{frame}{MurmerHash3 -- properties}
151 \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_2.png}
155 \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_3.png}
159 \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_4.png}
163 \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_5.png}
167 \section*{References}
169 \begin{frame}[plain]{References}
171 \mbox{}\vspace{-\baselineskip}
172 \printbibliography[heading=none]