]> git.donarmstrong.com Git - mash_minhash_presentation.git/blob - hpcbio_mash_minhash_jun_2016.Rnw
9b132641dcce6c9e4cd85424204ef9da27d1d4c8
[mash_minhash_presentation.git] / hpcbio_mash_minhash_jun_2016.Rnw
1 \documentclass[ignorenonframetext]{beamer}
2 \usepackage{fontspec}
3 \setmainfont{FreeSerif}
4 \setsansfont{FreeSans}
5 \setmonofont{FreeMono}
6 \usepackage{url}
7 \usepackage{fancyhdr}
8 \usepackage{graphicx}
9 \usepackage[bf]{caption}
10 \usepackage{rotating}
11 \usepackage{wrapfig}
12 \usepackage{fancybox}
13 \usepackage{booktabs}
14 % \usepackage{multirow}
15 \usepackage{acronym}
16 \usepackage{qrcode}
17 \usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
18 \addbibresource{references.bib}
19 % \usepackage[nomargin,inline,draft]{fixme}
20 % \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
21 % \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
22 \usepackage{texshade}
23 \usepackage{tikz}
24 \usepackage{nameref}
25 \usepackage{zref-xr,zref-user}
26 \renewcommand*{\bibfont}{\tiny}
27
28 % The textpos package is necessary to position textblocks at arbitary 
29 % places on the page.  Use showboxes option to show outlines of textboxes.
30 % \usepackage[absolute]{textpos}
31 \usepackage[absolute,overlay]{textpos}
32 \usepackage{mathtools,cancel}
33
34 \renewcommand{\CancelColor}{\color{red}} %change cancel color to red
35 \newenvironment{digression}[1]{\begin{textblock*}{64mm}(0.6\textwidth,0.2\textheight)%
36     \begin{block}{#1}}{%
37 \end{block}\end{textblock*}}
38
39
40 \usepackage{multirow}
41 \usepackage{array}
42
43 \mode<presentation>{ 
44   \usetheme{CambridgeUS}
45   \usecolortheme{crane}
46   % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
47   \definecolor{ilboldblue}{HTML}{002058}
48   \definecolor{ilboldorange}{HTML}{E87722}
49   \definecolor{ilblue}{HTML}{606EB2}
50   \definecolor{ilorange}{HTML}{D45D00}
51   \logo{\begin{tikzpicture}% Pale figure
52       {\node[opacity=0.6]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
53         };}%
54     \end{tikzpicture}}
55 }
56
57 \title[MASH]{Mash: fast genome and metagenome distance estimation}
58 \author[Don Armstrong]{Don L. Armstrong}
59 \institute[IGB]{Institute for Genomic Biology, Computing Genomes 
60   for Reproductive Health, University of Illinois, Urbana-Champaign}
61
62 \begin{document}
63
64 <<load.libraries,echo=FALSE,results="hide",warning=FALSE,message=FALSE,error=FALSE,cache=FALSE>>=
65 opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
66 #opts_chunk$set(cache=TRUE, autodep=TRUE)
67 options(device = function(file, width = 8, height = 7, ...) {
68   cairo_pdf(tempfile(), width = width, height = height, ...)
69 })
70 options(digits=2)
71 library("data.table")
72 library("ggplot2")
73 library("reshape2")
74 library("grid")
75 library("xtable")
76
77
78
79 \IfFileExists{figures/relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{figures/relevant_xkcd.png}
80
81     \url{https://xkcd.com/1691/}}}
82
83 \frame[plain]{\titlepage
84   \begin{center}
85     Code and slides are here: 
86     
87     \qrcode[padding]{http://dla2.us/p/mashminhash2016}
88     
89     \url{http://dla2.us/p/mashminhash2016}
90    \end{center}
91  }
92
93
94 \frame[plain]{
95   \begin{center}
96   \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/paper_frontpage}
97 \end{center}
98 }
99
100 \begin{frame}{MinHash Algorithm}
101   \begin{columns}
102     \column{0.3\textwidth}
103     \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_1.png}
104     \column{0.7\textwidth}
105     \begin{itemize}
106     \item Decompose dataset into k-mers
107       \only<2>{
108         \begin{digression}{What about strandedness}
109           \begin{itemize} 
110           \item Take lexically lowest sequence
111             \begin{itemize}
112             \item Given 7-mers 5'-ACTGCAC-3' and its reverse complement, 5'-GTGCAGT-3'
113             \item A $\lt$ G
114             \item Use ACTGCAC
115             \end{itemize}
116           \item Because $S(A \cup B)$ is a random sample of $A \cup B$
117             the fraction of elements in $S(A \cup B)$ which are shared
118             by $S(A)$ and $S(B)$ is an unbiased estimate of $J(A,B)$
119           \item $J(A,B) \approx \frac{|S(A \cup B) \cap S(A) \cap (B)}{|S(A \cup B)|}$
120           \end{itemize}
121         \end{digression}
122       }
123     \item Hash k-mers (32/64bit)
124     \item Estimate Jaccard Index $J(A,B)$
125       \only<3>{
126         \begin{digression}{Estimating Jaccard Index}
127           $J(A,B) = \frac{|A \cap B|}{|A \cup B|}$ 
128           \begin{itemize} 
129           \item Sample randomly from $A$ and $B$
130           \item Because $S(A \cup B)$ is a random sample of $A \cup B$
131             the fraction of elements in $S(A \cup B)$ which are shared
132             by $S(A)$ and $S(B)$ is an unbiased estimate of $J(A,B)$
133           \item $J(A,B) \approx \frac{|S(A \cup B) \cap S(A) \cap (B)}{|S(A \cup B)|}$
134           \end{itemize}
135         \end{digression}
136       }
137     \item<3-> How can we randomly sample?
138     \item<3-> Properties of the hash!
139     \end{itemize}
140   \end{columns}
141 \end{frame}
142
143 \begin{frame}{Hash functions}
144   
145 \end{frame}
146
147 \begin{frame}{MurmerHash3 -- properties}
148 \end{frame}
149
150 \begin{frame}
151   \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_2.png}
152 \end{frame}
153
154 \begin{frame}
155   \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_3.png}
156 \end{frame}
157
158 \begin{frame}
159   \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_4.png}
160 \end{frame}
161
162 \begin{frame}
163   \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{mash_minhash_paper/fig_5.png}
164 \end{frame}
165
166
167 \section*{References}
168
169 \begin{frame}[plain]{References}
170   \begin{center}
171     \mbox{}\vspace{-\baselineskip}
172     \printbibliography[heading=none]
173   \end{center}
174 \end{frame}
175
176 \end{document}