1 \documentclass[ignorenonframetext]{beamer}
2 \usepackage[Symbols,MiscellaneousSymbols]{ucharclasses}
5 \setmainfont{FreeSerif}
12 \usepackage{threeparttable}
13 \usepackage[backend=biber,natbib=true,hyperref=true,style=numeric-comp]{biblatex}
14 \bibliography{references}
15 \usepackage[nomargin,inline,draft]{fixme}
19 \usepackage{zref-xr,zref-user}
21 \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
23 \usepackage[x11names,svgnames,usenames,dvipsnames]{xcolor}
24 \usepackage[noxcolor]{beamerarticle}
27 \usepackage[bf]{caption}
32 \usepackage{adjustbox}
33 \usepackage{longtable}
35 \usepackage{pdflscape}
36 \usepackage[hyperfigures,bookmarks,colorlinks]{hyperref}
46 \usepackage{tcolorbox}
48 \BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
49 \AfterEndEnvironment{minted}{\end{tcolorbox}}%
52 \usetheme{CambridgeUS}
53 % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
54 \definecolor{ilboldblue}{HTML}{002058}
55 \definecolor{ilboldorange}{HTML}{E87722}
56 \definecolor{ilblue}{HTML}{606EB2}
57 \definecolor{ilorange}{HTML}{D45D00}
58 \setbeamercolor{alerted text}{fg=ilboldblue}
59 \setbeamercolor*{palette primary}{fg=ilblue,bg=ilorange}
60 \setbeamercolor*{palette secondary}{fg=ilblue!20!white,bg=ilorange}
61 \setbeamercolor*{palette tertiary}{bg=ilblue,fg=ilorange}
62 \setbeamercolor*{palette quaternary}{fg=ilblue,bg=ilorange}
64 \setbeamercolor*{sidebar}{fg=ilorange,bg=ilboldblue}
66 \setbeamercolor*{palette sidebar primary}{fg=ilblue!10!white,bg=ilorange}
67 \setbeamercolor*{palette sidebar secondary}{fg=ilorange}
68 \setbeamercolor*{palette sidebar tertiary}{fg=ilblue}
69 \setbeamercolor*{palette sidebar quaternary}{fg=ilorange}
71 % \setbeamercolor*{titlelike}{parent=palette primary}
72 \setbeamercolor{titlelike}{parent=palette primary,fg=ilboldblue,bg=ilorange}
73 \setbeamercolor{frametitle}{fg=ilboldorange,bg=ilblue!80!white}
74 \setbeamercolor{frametitle right}{fg=ilboldblue,bg=ilorange}
76 \setbeamercolor*{separation line}{}
77 \setbeamercolor*{fine separation line}{}
78 \setbeamercovered{transparent}
79 \logo{\begin{tikzpicture}% Pale figure
80 {\node[opacity=0.7]{\IfFileExists{./logo.pdf}{\includegraphics[height=1cm,width=1cm,keepaspectratio]{logo.pdf}}{}%
85 \title{Using make for science}
87 \author{Don Armstrong}
89 \subject{make for science}
92 \IfFileExists{./relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./relevant_xkcd.png}}}
94 \frame[plain]{\titlepage}
96 \mode<article>{\maketitle}
98 \section{What make was made for}
99 \begin{frame}{What was make originally made to do?}
101 \item Compiling and installing software from source
102 \item Replacement of operating system specific compilation and
103 installation shell scripts
104 \item Re-compile when dependencies of the software were modified
108 \subsection{Brief history of makes}
110 \begin{frame}{Brief history of make-alikes}
113 \href{http://pubs.opengroup.org/onlinepubs/009695399/utilities/make.html}{POSIX
114 Make} (standardization of basic features of make)
115 \item \href{http://www.gnu.org/software/make/manual/}{GNU Make}
116 (standard make on Linux and OS X)
117 \item \href{https://www.freebsd.org/cgi/man.cgi?query=make(1)}{BSD
118 Make} (pmake or bmake)
120 \href{https://msdn.microsoft.com/en-us/library/dd9y37ha.aspx}{nmake}
121 (Part of visual studio)
122 \item \href{http://plan9.bell-labs.com/sys/doc/mk.html}{Mk} (Plan 9
127 \subsection{Other solutions in this problem space}
129 \begin{frame}{Other non-make dependency builders}
131 \item Ant (popular for java software)
132 \item Cabal (popular for Haskell)
133 \item Maven (also java)
134 \item Rake (ruby build took)
135 \item Gradle (Rake DSL)
136 \item Leiningen (Clojure)
137 \item Tweaker (task definitions in any language)
138 \item Ruffus (Pipeline library for python)
140 \href{https://en.wikipedia.org/wiki/List_of_build_automation_software}{Wikipedia
141 List of build automation software}
145 \subsection{Why use GNU make?}
146 \begin{frame}{Why use GNU make?}
148 \item Ubiquitous -- any machine which you can run command line tools
149 on has GNU make available.
150 \item Large community -- lots of people use GNU make. It's not going
152 \item Simple rules -- all of the rules are in a simple text file
153 which is easily edited and version controlled
154 \item Reasonable debugging -- you can see the commands that make is
155 going to run fairly easily: \mintinline{shell}{make -n target;}
156 \item Parallel -- make can make targets in parallel:
157 \mintinline{shell}{make -j8 all;}
158 \item Language agnostic -- make doesn't care what language your code
163 \section{Introduction to Makefiles}
165 \begin{frame}[fragile]{Simple Makefile}
166 \begin{minted}[showtabs]{make}
168 echo "hello world" > hello_world
173 \subsection{General Syntax}
175 \begin{frame}[fragile]{Simple Makefile}
176 \begin{minted}[showtabs]{make}
177 TARGETS: PREREQUISITES
181 \item TARGETS are file names separated by spaces
182 \item PREREQUISITES are file names separated by spaces.
183 \item RECIPE lines start with a tab, are executed by the shell and
184 describe how to make the TARGETS (generally from the PREREQUISITES)
185 \item A TARGET is out of date if it does not exist or if it is older
186 than any of the prerequisites.
190 \subsection{Variables}
192 \begin{frame}[fragile]{Some Variables}
194 \item Two flavors of variables
196 \item \mintinline{make}{FOO=bar} -- recursively expanded variables;
197 references to other variables are expanded at the time this
199 \item \mintinline{make}{FOO:=bar} -- simply expanded variables; the
200 value is assigned at the moment the variable is created
202 \item Variables can come from the environment and can be overridden on
203 the command line: \mintinline{shell}{FOO=blah make} or
204 \mintinline{shell}{make FOO=bleargh}.
205 \item \mintinline{make}{$@} -- target name %$
206 \item \mintinline{make}{$*} -- current stem %$
207 \item \mintinline{make}{$^} -- all prerequisites %$
208 \item \mintinline{make}{$<} -- first prerequisite %$
209 \item \mintinline{make}{$(FOO)} -- how variables are referenced %$
213 \subsection{Functions}
215 \begin{frame}[fragile]{Some Functions}
217 \item \mintinline{make}{$(patsubst %.sam,%.bam,foo.sam bar.sam)} %$
218 -- returns foo.bam bar.bam.
219 \item \mintinline{make}{$(filter-out %.bam,foo.sam bar.bam)} %$
221 \item \mintinline{make}{$(words foo.sam bar.bam)} %$
222 -- returns the number of words in its argument (2)
223 \item \mintinline{make}{$(wordlist 1,2,foo.sam bar.bam bleargh.foo)} %$
224 -- returns the words in its last argument starting with the 1st
225 and ending with the second.
231 \subsubsection{Default Target}
233 \begin{frame}[fragile]{How does make know what to build?}
234 \begin{minted}[showtabs]{make}
237 second_target: first_target
241 \item By default, make builds the first target.
242 \item You can specify a specific target to build on the command line
243 (\mintinline{shell}{make first_target}).
244 \item You can change the default target by using the variable
245 \mintinline{make}{.DEFAULT_GOAL := second_target}
250 \subsubsection{Special Targets}
252 \begin{frame}[fragile]{Special Targets}
253 \begin{minted}[showtabs]{make}
257 rm -f first_target second_target
260 \item \mintinline{make}{.PHONY} -- any time make considers this
261 target, it is run unconditionally, even if a file exists.
262 \item \mintinline{make}{.ONESHELL} -- when a target is built, all
263 lines will be given to a single invocation of the shell.
264 \item Lots of other special targets which are not described here.
269 \subsubsection{Pattern Rules}
272 \begin{frame}[fragile]{Special Targets}
273 \begin{minted}[showtabs]{make}
278 samtools view -b -o $@ $<
281 \item \% is the pattern stem which is accessible by
282 \mintinline{make}{$*} %$ within rules
283 \item The first rule uncompresses fasta files
284 \item The second rule turns sam files into bam files
290 \subsection{This Presentation}
292 \begin{frame}[fragile]{How this presentation is made}
293 \inputminted[showtabs,breaklines,firstline=3]{make}{Makefile}
295 \begin{frame}[fragile]{How this presentation is made}
297 \item all is the default
298 \item Download the optional relevant\_xkcd.png
299 \item Make .tex files from the knitr source.
300 \item The third rule uses latexmk to build the pdf using \XeLaTeX.
304 \subsection{Calling records from SRA}
306 \begin{frame}{Calling records from SRA: The problem}
308 \item ≈200 tissue samples from Roadmap Epigenomics
309 \item No consistent workflow
310 \item Reanalyze them all using STAR and cufflinks
314 \begin{frame}[fragile]{Calling records from SRA: Downloading}
315 \begin{minted}[showtabs,breaklines]{make}
317 SRRS=SRR020291 SRR020290
319 SRR_FILES=$(patsubst %,%.sra,$(SRRS))
321 get_srr: $(SRR_FILES)
324 rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
328 \item First three lines are actually generated by other code and
330 \item Download all of the SRA files using rsync
334 \begin{frame}[fragile]{Calling records from SRA: Dumping fastq}
335 \begin{minted}[showtabs,breaklines]{make}
337 FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS))
339 FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS))
342 make_fastq: $(FASTQ_FILES)
346 \item Use ifeq/else/endif to handle paired reads differently from
348 \item FASTQ\_FILES is the full set of fastq files dumped from the SRAs.
352 \begin{frame}[fragile]{Calling records from SRA: Dumping fastq #2}
353 \begin{minted}[showtabs,breaklines]{make}
355 $(FASTQ_FILES): %.fastq.gz: %.sra
357 %_1.fastq.gz %_2.fastq.gz: %.sra
359 $(MODULE) load sratoolkit/2.3.5-2; \
360 fastq-dump --split-3 --gzip $^;
364 \item Handles NREADS of 1 and 2 differently
365 \item Call fastq-dump to dump the fastq files
369 \begin{frame}[fragile]{Calling records from SRA: Align with STAR}
370 \begin{minted}[showtabs,breaklines]{make}
372 $(MODULE) load STAR/2.4.2a; \
373 mkdir -p $(SRX)_star; \
374 STAR --outFileNamePrefix $(SRX)_star/ \
375 --outSAMtype BAM SortedByCoordinate \
376 --runThreadN $(CORES) \
377 --outSAMstrandField intronMotif \
378 --genomeDir $(STAR_INDEX_DIR) \
379 --readFilesCommand "gzip -dc" \
380 --readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
381 ln $(SRX)_star/Aligned.*.bam $@ -s
385 \item Call STAR with lots of options to do the alignment
389 \begin{frame}[fragile]{Calling records from SRA: Call with cufflinks}
390 \begin{minted}[showtabs,breaklines]{make}
391 call: $(SRX)_genes.fpkm_tracking
393 $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF)
394 $(MODULE) load cufflinks/2.2.1; \
395 cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $<
396 for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
397 mv $${file} $(SRX)_$${file}; \
402 \item Use cufflinks to call
406 \begin{frame}[fragile]{Run it on biocluster}
407 \begin{minted}{shell}
408 ~donarm/uiuc_igb_scripts/dqsub --mem 70G \
412 \item dqsub is my own qsub wrapper which avoids me having to write
413 little scripts for everything
414 \item \url{http://git.donarmstrong.com/?p=uiuc_igb_scripts.git;a=blob;f=dqsub}
418 \section{Why not make?}
420 \begin{frame}{Why not make?}
422 \item Timestamps, not MD5sums
423 \item Complicated workflows
424 \item Interaction of rules can be complicated to understand
425 \item Yet Another Language
429 \subsection{Timestamps}
431 \begin{frame}[fragile]{Dealing with timestamps}
432 \begin{minted}[showtabs,breaklines]{make}
433 TARGET: PREREQ1 PREREQ1
434 if [ -e $@.tgt.md5sum ] && [ -e $@ ] \
435 && md5sum --status --check \
436 $@.tgt.md5sum; then \
440 md5sum $^ > $@.tgt.md5sum; \
445 \item Make builds things on the basis of timestamps
446 \item But what if the contents haven't changed and it's expensive to
452 \subsection{Complicated Workflows}
454 \begin{frame}[fragile]{What about complicated workflows?}
456 \item If your workflow is really complicated, what then?
457 \item Use some other language to write your workflow in
458 \item Use a simple makefile which just runs the workflow
460 \begin{minted}[showtabs,breaklines]{make}
461 complicated_workflow_done: req1 req2 req3
462 ./complicated_workflow.sh $^;
467 \section{Further Resources}
469 \begin{frame}{Further Resources}
471 \item GNU Make Manual: \url{https://www.gnu.org/software/make/manual/}
472 \item Mailing lists: \url{http://www.gnu.org/software/make/}
473 \item Stack overflow: \url{http://stackoverflow.com/questions/tagged/make}
474 \item Myself: \href{mailto:don@donarmstrong.com}{don@donarmstrong.com}
475 \item This presentation: \url{http://git.donarmstrong.com/using\_make\_for\_science.git}