\setbeamercolor*{fine separation line}{}
\setbeamercovered{transparent}
\logo{\begin{tikzpicture}% Pale figure
- {\node[opacity=0.7]{\IfFileExists{./logo.pdf}{\includegraphics[height=1.5cm]{logo.pdf}}{}%
+ {\node[opacity=0.7]{\IfFileExists{./logo.pdf}{\includegraphics[height=1cm,width=1cm,keepaspectratio]{logo.pdf}}{}%
};}%
\end{tikzpicture}}
}
\subject{make for science}
\begin{document}
+\IfFileExists{./relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./relevant_xkcd.png}}}
+
\frame[plain]{\titlepage}
\mode<article>{\maketitle}
\item Gradle (Rake DSL)
\item Leiningen (Clojure)
\item Tweaker (task definitions in any language)
+ \item Ruffus (Pipeline library for python)
\item
\href{https://en.wikipedia.org/wiki/List_of_build_automation_software}{Wikipedia
List of build automation software}
\end{itemize}
\end{frame}
+\subsection{Why use GNU make?}
+\begin{frame}{Why use GNU make?}
+ \begin{itemize}
+ \item Ubiquitous -- any machine which you can run command line tools
+ on has GNU make available.
+ \item Large community -- lots of people use GNU make. It's not going
+ to go away tomorrow.
+ \item Simple rules -- all of the rules are in a simple text file
+ which is easily edited and version controlled
+ \item Reasonable debugging -- you can see the commands that make is
+ going to run fairly easily: \mintinline{shell}{make -n target;}
+ \item Parallel -- make can make targets in parallel:
+ \mintinline{shell}{make -j8 all;}
+ \item Language agnostic -- make doesn't care what language your code
+ is written in
+ \end{itemize}
+\end{frame}
+
\section{Introduction to Makefiles}
\begin{frame}[fragile]{Simple Makefile}
value is assigned at the moment the variable is created
\end{itemize}
\item Variables can come from the environment and can be overridden on
- the command line: \mintinline{shell}{make FOO=bleargh} or
- \mintinline{shell}{FOO=blah make}.
+ the command line: \mintinline{shell}{FOO=blah make} or
+ \mintinline{shell}{make FOO=bleargh}.
\item \mintinline{make}{$@} -- target name %$
\item \mintinline{make}{$*} -- current stem %$
\item \mintinline{make}{$^} -- all prerequisites %$
\begin{frame}[fragile]{Some Functions}
\begin{itemize}
- \item \mintinline{make}{$(patsubst %.bam,%.sam,foo.sam bar.sam)} %$
+ \item \mintinline{make}{$(patsubst %.sam,%.bam,foo.sam bar.sam)} %$
-- returns foo.bam bar.bam.
\item \mintinline{make}{$(filter-out %.bam,foo.sam bar.bam)} %$
-- returns foo.sam
\begin{frame}[fragile]{Special Targets}
\begin{minted}[showtabs]{make}
-%.fasta.gz: %.fasta
+%.fasta: %.fasta.gz
gzip -dc $< > $@
%.bam: %.sam
\subsection{This Presentation}
\begin{frame}[fragile]{How this presentation is made}
-\inputminted[showtabs]{make}{Makefile}
+\inputminted[showtabs,breaklines,firstline=3]{make}{Makefile}
+\end{frame}
+\begin{frame}[fragile]{How this presentation is made}
\begin{itemize}
- \item all is the default target
+ \item all is the default
+ \item Download the optional relevant\_xkcd.png
\item Make .tex files from the knitr source.
\item The third rule uses latexmk to build the pdf using \XeLaTeX.
\end{itemize}
\begin{frame}[fragile]{Calling records from SRA: Downloading}
\begin{minted}[showtabs,breaklines]{make}
SRX=SRX007165
-SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289
+SRRS=SRR020291 SRR020290
NREADS=1
SRR_FILES=$(patsubst %,%.sra,$(SRRS))
$(SRR_FILES): %.sra:
rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
-\end{minted} %$
+\end{minted}
+%$
\begin{itemize}
\item First three lines are actually generated by other code and
included
endif
make_fastq: $(FASTQ_FILES)
+\end{minted}
+%$
+\begin{itemize}
+\item Use ifeq/else/endif to handle paired reads differently from
+ unpaired reads
+\item FASTQ\_FILES is the full set of fastq files dumped from the SRAs.
+\end{itemize}
+\end{frame}
+\begin{frame}[fragile]{Calling records from SRA: Dumping fastq #2}
+\begin{minted}[showtabs,breaklines]{make}
ifeq ($(NREADS),1)
$(FASTQ_FILES): %.fastq.gz: %.sra
else
endif
$(MODULE) load sratoolkit/2.3.5-2; \
fastq-dump --split-3 --gzip $^;
-\end{minted} %$
+\end{minted}
+%$
\begin{itemize}
- \item Call fastq-dump to dump the fastq files
\item Handles NREADS of 1 and 2 differently
+ \item Call fastq-dump to dump the fastq files
\end{itemize}
\end{frame}
--readFilesCommand "gzip -dc" \
--readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
ln $(SRX)_star/Aligned.*.bam $@ -s
-\end{minted} %$
+\end{minted}
+%$
\begin{itemize}
\item Call STAR with lots of options to do the alignment
\end{itemize}
for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
mv $${file} $(SRX)_$${file}; \
done;
-\end{minted} %$
+\end{minted}
+%$
\begin{itemize}
\item Use cufflinks to call
\end{itemize}
\begin{itemize}
\item Timestamps, not MD5sums
\item Complicated workflows
+ \item Interaction of rules can be complicated to understand
+ \item Yet Another Language
\end{itemize}
\end{frame}
\subsection{Timestamps}
-\begin{frame}{Dealing with timestamps}
- \begin{itemize}
- \item Make builds things on the basis of timestamps
- \item But what if the contents haven't changed and it's expensive to
- rebuild?
- \item Use md5sum!
- \end{itemize}
-\begin{minted}[showtabs,breaklines]{make}
+\begin{frame}[fragile]{Dealing with timestamps}
+ \begin{minted}[showtabs,breaklines]{make}
TARGET: PREREQ1 PREREQ1
- if [ -e $@.target.md5sum ] && [ -e $@ ] && \
- md5sum --status --check $@.target.md5sum; \
- then \
+ if [ -e $@.tgt.md5sum ] && [ -e $@ ] \
+ && md5sum --status --check \
+ $@.tgt.md5sum; then \
touch $@; \
else \
- RECIPE FOR $@; \
- md5sum $^ > $@.target.md5sum; \
+ RECIPE FOR $@; \
+ md5sum $^ > $@.tgt.md5sum; \
fi;
\end{minted}
+% $
+\begin{itemize}
+\item Make builds things on the basis of timestamps
+\item But what if the contents haven't changed and it's expensive to
+ rebuild?
+\item Use md5sum!
+\end{itemize}
\end{frame}
\subsection{Complicated Workflows}
-\begin{frame}{Complicated Workflow}
-
+\begin{frame}[fragile]{What about complicated workflows?}
+ \begin{itemize}
+ \item If your workflow is really complicated, what then?
+ \item Use some other language to write your workflow in
+ \item Use a simple makefile which just runs the workflow
+ \end{itemize}
+ \begin{minted}[showtabs,breaklines]{make}
+complicated_workflow_done: req1 req2 req3
+ ./complicated_workflow.sh $^;
+ touch $@;
+\end{minted}
\end{frame}
\section{Further Resources}
\begin{frame}{Further Resources}
+ \begin{itemize}
+ \item GNU Make Manual: \url{https://www.gnu.org/software/make/manual/}
+ \item Mailing lists: \url{http://www.gnu.org/software/make/}
+ \item Stack overflow: \url{http://stackoverflow.com/questions/tagged/make}
+ \item Myself: \href{mailto:don@donarmstrong.com}{don@donarmstrong.com}
+ \item This presentation: \url{http://git.donarmstrong.com/using\_make\_for\_science.git}
+ \end{itemize}
\end{frame}
\end{document}