% The textpos package is necessary to position textblocks at arbitary
% places on the page. Use showboxes option to show outlines of textboxes.
% \usepackage[absolute]{textpos}
-\usepackage[absolute,showboxes]{textpos}
+\usepackage[absolute,overlay]{textpos}
+\usepackage{mathtools,cancel}
+
+\renewcommand{\CancelColor}{\color{red}} %change cancel color to red
\usepackage{multirow}
\usepackage{array}
\mode<presentation>{
\usetheme{CambridgeUS}
+ \usecolortheme{crane}
% http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
\definecolor{ilboldblue}{HTML}{002058}
\definecolor{ilboldorange}{HTML}{E87722}
\definecolor{ilblue}{HTML}{606EB2}
\definecolor{ilorange}{HTML}{D45D00}
- \setbeamercolor{alerted text}{fg=ilboldorange}
- \setbeamercolor*{palette primary}{fg=ilblue,bg=ilorange}
- \setbeamercolor*{palette secondary}{fg=ilblue!20!white,bg=ilorange}
- \setbeamercolor*{palette tertiary}{bg=ilblue,fg=ilorange}
- \setbeamercolor*{palette quaternary}{fg=ilblue,bg=ilorange}
-
- \setbeamercolor*{sidebar}{fg=ilorange,bg=ilboldblue}
-
- \setbeamercolor*{palette sidebar primary}{fg=ilblue!10!white,bg=ilorange}
- \setbeamercolor*{palette sidebar secondary}{fg=ilorange}
- \setbeamercolor*{palette sidebar tertiary}{fg=ilblue}
- \setbeamercolor*{palette sidebar quaternary}{fg=ilorange}
-
- % \setbeamercolor*{titlelike}{parent=palette primary}
- \setbeamercolor{titlelike}{parent=palette primary,fg=ilboldblue,bg=ilorange}
- \setbeamercolor{frametitle}{fg=ilboldorange,bg=ilblue!80!white}
- \setbeamercolor{frametitle right}{fg=ilboldblue,bg=ilorange}
-
- \setbeamercolor*{separation line}{}
- \setbeamercolor*{fine separation line}{}
- \setbeamercovered{transparent}
\logo{\begin{tikzpicture}% Pale figure
- {\node[opacity=0.8]{\IfFileExists{figures/igb_wordmark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/igb_wordmark}}{}%
+ {\node[opacity=0.6]{\IfFileExists{figures/uofi_mark.pdf}{\includegraphics[width=2cm,height=1cm,keepaspectratio]{figures/uofi_mark}}{}%
};}%
\end{tikzpicture}}
}
\url{https://xkcd.com/1513/}}}
-\frame[plain]{\titlepage}
+\frame[plain]{\titlepage
+ \begin{center}
+ Code and slides are here:
+
+ \qrcode[padding]{http://dla2.us/p/dmnd2015}
+
+ \url{http://dla2.us/p/dmnd2015}
+ \end{center}
+ }
\section{The Problem}
+\begin{frame}{The Problem}
+ \begin{itemize}
+ \item mRNA sequences
+ \item No clear reference genome/database
+ \item How to annotate them?
+ \end{itemize}
+ \only<2>{
+ \begin{textblock*}{64mm}(32mm,0.15\textheight)
+ \begin{exampleblock}{No Reference Genome}
+ \begin{center}
+ \includegraphics[height=0.3\textheight,keepaspectratio]{figures/spalax.jpg}
+ \end{center}
+ \begin{itemize}
+ \item Blind mole rat (\textit{Spalax})
+ \item No reference genome
+ \item Close to mouse/rat, but not close enough to use
+ mouse/rat directly
+ \item Placenta sequences; how to annotate them?
+ \end{itemize}
+ \end{exampleblock}
+ \end{textblock*}
+ }
+ \only<3>{
+ \begin{textblock*}{64mm}(32mm,0.25\textheight)
+ \begin{exampleblock}<3>{Environmental Samples}
+ \begin{itemize}
+ \item Permafrost
+ \item Many un-cultured, un-sequenced bacteria
+ \item How to annotate what genes they are expressing?
+ \end{itemize}
+ \end{exampleblock}
+ \end{textblock*}
+ }
+\end{frame}
+
+\begin{frame}{Basic Solution}
+ \begin{itemize}
+ \item Annotate mRNA against proteins in the nr or in a suitable
+ reference proteome
+ \item Six possible translations of nucleotide into amino acid
+ \item Take all possible sub-sequences
+ \item Hash into reference, extend match
+ \item Pick best match
+ \end{itemize}
+\end{frame}
\section{The Previous Contenders}
+\begin{frame}{Previous Contenders}
+ \begin{itemize}
+ \item Blastx
+ \item Rapsearch
+ \item mBlast
+ \item many, many, more
+ \end{itemize}
+\end{frame}
+
+
+\section{Diamond Advances}
+\begin{frame}{Diamond Methodology Advances}
+ \begin{itemize}
+ \item Seed and Extend
+ \item Reduced Alphabet
+ \item Spaced Seeds with Specific Seed Shape
+ \item Double Indexing
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Seed and Extend}
+ \begin{itemize}
+ \item Calculate an index
+ \item Look up matching indices in the database
+ \item Local string alignment using Smith-Waterman
+ \item Looks like blast, right?
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Reduced Alphabet}
+
+ \begin{itemize}
+ \item \colorbox{red}{LVI} \colorbox{red!10!white}{M} \colorbox{yellow}{C} \colorbox{black!50!white}{G} \colorbox{red!50!blue}{\textcolor{white}{STA}}
+ \colorbox{green}{P} \colorbox{red!50!blue!50!white}{F} \colorbox{red!50!blue!20!white}{Y} \colorbox{black}{\textcolor{white}{W}} \colorbox{blue}{\textcolor{white}{KREDNQ}}
+%- [KREDQN] [C] [G] [H] [ILV] [M] [F] [Y] [W] [P] [STA].
+ \item Smaller index sizes --- less memory usage
+ \item Greater sensitivity --- seed more likely to match
+ \item More likelihood of useless extensions --- only the seed matched
+ \end{itemize}
+ {\centering
+ \includegraphics[height=0.5\textheight,width=0.8\textwidth,keepaspectratio]{figures/reduced_alphabet.png}
+
+ \cite{Murphy.ea2000:Simplifiedaminoacidalphabets}
+ }
+
+\end{frame}
-\section{Diamond Methodology}
-\begin{frame}{Diamond Methodology}
+\begin{frame}{Spaced Seeds with Specific Seed Shape}
\begin{itemize}
- \item
- \end{itemize}
+ \item Spaced seeds are longer seeds in which only a subset of the
+ positions are used
+ \item For example, if
+ \begin{itemize}
+ \item the sequence was ABCDEFGHI
+ \item the seed shape was
+ 11100010
+ \item then you would query into the index with ABCG
+ \end{itemize}
+ \item Originally presented in PatternHunter\cite{Ma.ea2002:PatternHunterfasterandmore}
+ \item Why is this better than consecutive seeds?
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Consecutive Seeds vs Spaced Seeds}
+ \begin{itemize}
+ \item Target Sequence: ABCDEFGHIJK
+ \item Sequenced Sequence: ABC\textcolor{red}{Z}EF\textcolor{red}{Y}HI\textcolor{red}{X}K
+ \item Seed Shape: 11100010 (4) and Consecutive: 1111 (4)
+ \end{itemize}
+% ABCZEFGHIYK
+% ABCDEFGHIJK
+% 11000010---
+% -11000010--
+% --11000010-
+% ---11000010
+
+ \begin{columns}
+ \column{0.6\textwidth}
+\begin{block}{Pathological example}
+ \begin{tabular}{c c c}
+ Shift & Spaced & Consecutive \\
+ 0 & ABCF=ABCF & ABCD≠ABCZ \\
+ 1 & BCDG≠BCZY & BCDE≠BCZE \\
+ 2 & CDEH≠CZEH & CDEF≠CZEF \\
+ 3 & DEFI≠ZEFI & DEFG≠ZEFY \\
+ 4 & EFGJ≠EFYW & EFGH≠EFYH \\
+ 5 & FGHK≠FYHK & FGHI≠FYHI \\
+ 6 & & GHIJ≠YHIW \\
+ 7 & & HIJK≠HIWK \\
+ \end{tabular}
+\end{block}
+ \column{0.4\textwidth}
+ \begin{itemize}
+ \item Spaced seed matches once
+ \item Consecutive seed never matches
+ \item Consecutive seed does more comparisons and may match
+ repeatedly
+ \end{itemize}
+\end{columns}
+\end{frame}
+
+\begin{frame}{Optimal Spaced Seed}
+ \begin{itemize}
+ \item Fewest overlaps with shifted seed
+ \item Longer seeds are better
+ \item Equivalent weight
+ \item Use dynamic programming to calculate optimal seed for given
+ length
+ \end{itemize}
+ \begin{columns}
+ \column{0.6\textwidth}
+ \begin{block}{DIAMOND Seeds (Fast)}
+ \begin{itemize}
+ \item 111101011101111 (12)
+ \item 111011001100101111 (12)
+ \item 1111001001010001001111 (12)
+ \item 111100101000010010010111 (12)
+ \end{itemize}
+ \end{block}
+\end{columns}
+\end{frame}
+
+\begin{frame}{Double Indexing}
+ \begin{itemize}
+ \item Blastx indexes the database
+ \item Blastx runs the queries in input order
+ \item DIAMOND indexes both the database and the queries
+ \item DIAMOND runs queries in index order
+ \item Why is this faster?
+ \end{itemize}
+\end{frame}
+
+\begin{frame}{Double Indexing: Why it's faster}
+ \begin{itemize}
+ \item Cache architecture
+ \begin{itemize}
+ \item On CPU Cache -- L1,L2
+ \item Shared CPU Cache L3
+ \item Much faster than main memory
+ \end{itemize}
+ \item Each cache miss must hit main memory (must hit northbridge,
+ which has significantly more latency than main cache, and takes
+ hundreds of cycles)
+ \item Dictionary Example: Is it faster to look up
+ \begin{itemize}
+ \item “apple”, “xylophone”, “appliance”, “xylem”
+ \item or “apple”, “appliance”, “xylem”, “xylophone”?
+ \end{itemize}
+ \end{itemize}
+\end{frame}
+
+\section{Usage}
+
+\begin{frame}{DIAMOND Usage}
+ \begin{itemize}
+ \item Make the diamond database:
+ \texttt{diamond makedb --in foo.fasta --db foo.dmnd;}
+ \item Run the diamond query:
+ \texttt{diamond blastx --db foo.diamond --threads 24 --query bar.fasta --daa bar\_diamond.txt}
+ \end{itemize}
\end{frame}
\section{Comparison}
\subsection{Speed}
+\begin{frame}{Speed of DIAMOND}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{diamond_paper_figures/diamond_F1_a.jpg}
+\end{frame}
+
\subsection{Accuracy}
+\begin{frame}{Accuracy of DIAMOND: Any success}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{diamond_paper_figures/diamond_F1_b.jpg}
+\end{frame}
+
+\begin{frame}{Accuracy of DIAMOND: Matches blastx}
+ \includegraphics[width=\textwidth,height=0.8\textheight,keepaspectratio]{diamond_paper_figures/diamond_F1_c.jpg}
+\end{frame}
+
\section*{References}