1 \documentclass[ignorenonframetext]{beamer}
2 \usepackage[Symbols,MiscellaneousSymbols]{ucharclasses}
5 \setmainfont{FreeSerif}
12 \usepackage{threeparttable}
13 \usepackage[backend=biber,natbib=true,hyperref=true,style=numeric-comp]{biblatex}
14 \bibliography{references}
15 \usepackage[nomargin,inline,draft]{fixme}
19 \usepackage{zref-xr,zref-user}
21 \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
23 \usepackage[x11names,svgnames,usenames,dvipsnames]{xcolor}
24 \usepackage[noxcolor]{beamerarticle}
27 \usepackage[bf]{caption}
32 \usepackage{adjustbox}
33 \usepackage{longtable}
35 \usepackage{pdflscape}
36 \usepackage[hyperfigures,bookmarks,colorlinks]{hyperref}
46 \usepackage{tcolorbox}
48 \BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
49 \AfterEndEnvironment{minted}{\end{tcolorbox}}%
52 \usetheme{CambridgeUS}
53 % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
54 \definecolor{ilboldblue}{HTML}{002058}
55 \definecolor{ilboldorange}{HTML}{E87722}
56 \definecolor{ilblue}{HTML}{606EB2}
57 \definecolor{ilorange}{HTML}{D45D00}
58 \setbeamercolor{alerted text}{fg=ilboldblue}
59 \setbeamercolor*{palette primary}{fg=ilblue,bg=ilorange}
60 \setbeamercolor*{palette secondary}{fg=ilblue!20!white,bg=ilorange}
61 \setbeamercolor*{palette tertiary}{bg=ilblue,fg=ilorange}
62 \setbeamercolor*{palette quaternary}{fg=ilblue,bg=ilorange}
64 \setbeamercolor*{sidebar}{fg=ilorange,bg=ilboldblue}
66 \setbeamercolor*{palette sidebar primary}{fg=ilblue!10!white,bg=ilorange}
67 \setbeamercolor*{palette sidebar secondary}{fg=ilorange}
68 \setbeamercolor*{palette sidebar tertiary}{fg=ilblue}
69 \setbeamercolor*{palette sidebar quaternary}{fg=ilorange}
71 % \setbeamercolor*{titlelike}{parent=palette primary}
72 \setbeamercolor{titlelike}{parent=palette primary,fg=ilboldblue,bg=ilorange}
73 \setbeamercolor{frametitle}{fg=ilboldorange,bg=ilblue!80!white}
74 \setbeamercolor{frametitle right}{fg=ilboldblue,bg=ilorange}
76 \setbeamercolor*{separation line}{}
77 \setbeamercolor*{fine separation line}{}
78 \setbeamercovered{transparent}
79 \logo{\begin{tikzpicture}% Pale figure
80 {\node[opacity=0.7]{\IfFileExists{./logo.pdf}{\includegraphics[height=1.5cm]{logo.pdf}}{}%
85 \title{Using make for science}
87 \author{Don Armstrong}
89 \subject{make for science}
92 \frame[plain]{\titlepage}
94 \mode<article>{\maketitle}
96 \section{What make was made for}
97 \begin{frame}{What was make originally made to do?}
99 \item Compiling and installing software from source
100 \item Replacement of operating system specific compilation and
101 installation shell scripts
102 \item Re-compile when dependencies of the software were modified
106 \subsection{Brief history of makes}
108 \begin{frame}{Brief history of make-alikes}
111 \href{http://pubs.opengroup.org/onlinepubs/009695399/utilities/make.html}{POSIX
112 Make} (standardization of basic features of make)
113 \item \href{http://www.gnu.org/software/make/manual/}{GNU Make}
114 (standard make on Linux and OS X)
115 \item \href{https://www.freebsd.org/cgi/man.cgi?query=make(1)}{BSD
116 Make} (pmake or bmake)
118 \href{https://msdn.microsoft.com/en-us/library/dd9y37ha.aspx}{nmake}
119 (Part of visual studio)
120 \item \href{http://plan9.bell-labs.com/sys/doc/mk.html}{Mk} (Plan 9
125 \subsection{Other solutions in this problem space}
127 \begin{frame}{Other non-make dependency builders}
129 \item Ant (popular for java software)
130 \item Cabal (popular for Haskell)
131 \item Maven (also java)
132 \item Rake (ruby build took)
133 \item Gradle (Rake DSL)
134 \item Leiningen (Clojure)
135 \item Tweaker (task definitions in any language)
136 \item Ruffus (Pipeline library for python)
138 \href{https://en.wikipedia.org/wiki/List_of_build_automation_software}{Wikipedia
139 List of build automation software}
143 \subsection{Why use GNU make?}
144 \begin{frame}{Why use GNU make?}
146 \item Ubiquitous -- any machine which you can run command line tools
147 on has GNU make available.
148 \item Large community -- lots of people use GNU make. It's well
149 understood and you can get questions answered
150 \item Simple rules -- all of the rules are in a simple text file
151 which is easily edited and version controlled
152 \item Reasonable debugging -- you can see the commands that make is
153 going to run fairly easily: \mintinline{shell}{make -n tgt;}
154 \item Parallel -- make can make targets in parallel:
155 \mintinline{shell}{make -j8 all;}
156 \item Language agnostic -- make doesn't care what language your code
161 \section{Introduction to Makefiles}
163 \begin{frame}[fragile]{Simple Makefile}
164 \begin{minted}[showtabs]{make}
166 echo "hello world" > hello_world
171 \subsection{General Syntax}
173 \begin{frame}[fragile]{Simple Makefile}
174 \begin{minted}[showtabs]{make}
175 TARGETS: PREREQUISITES
179 \item TARGETS are file names separated by spaces
180 \item PREREQUISITES are file names separated by spaces.
181 \item RECIPE lines start with a tab, are executed by the shell and
182 describe how to make the TARGETS (generally from the PREREQUISITES)
183 \item A TARGET is out of date if it does not exist or if it is older
184 than any of the prerequisites.
188 \subsection{Variables}
190 \begin{frame}[fragile]{Some Variables}
192 \item Two flavors of variables
194 \item \mintinline{make}{FOO=bar} -- recursively expanded variables;
195 references to other variables are expanded at the time this
197 \item \mintinline{make}{FOO:=bar} -- simply expanded variables; the
198 value is assigned at the moment the variable is created
200 \item Variables can come from the environment and can be overridden on
201 the command line: \mintinline{shell}{make FOO=bleargh} or
202 \mintinline{shell}{FOO=blah make}.
203 \item \mintinline{make}{$@} -- tgt name %$
204 \item \mintinline{make}{$*} -- current stem %$
205 \item \mintinline{make}{$^} -- all prerequisites %$
206 \item \mintinline{make}{$<} -- first prerequisite %$
207 \item \mintinline{make}{$(FOO)} -- how variables are referenced %$
211 \subsection{Functions}
213 \begin{frame}[fragile]{Some Functions}
215 \item \mintinline{make}{$(patsubst %.bam,%.sam,foo.sam bar.sam)} %$
216 -- returns foo.bam bar.bam.
217 \item \mintinline{make}{$(filter-out %.bam,foo.sam bar.bam)} %$
219 \item \mintinline{make}{$(words foo.sam bar.bam)} %$
220 -- returns the number of words in its argument (2)
221 \item \mintinline{make}{$(wordlist 1,2,foo.sam bar.bam bleargh.foo)} %$
222 -- returns the words in its last argument starting with the 1st
223 and ending with the second.
229 \subsubsection{Default Target}
231 \begin{frame}[fragile]{How does make know what to build?}
232 \begin{minted}[showtabs]{make}
235 second_tgt: first_tgt
239 \item By default, make builds the first tgt.
240 \item You can specify a specific tgt to build on the command line
241 (\mintinline{shell}{make first_tgt}).
242 \item You can change the default tgt by using the variable
243 \mintinline{make}{.DEFAULT_GOAL := second_tgt}
248 \subsubsection{Special Targets}
250 \begin{frame}[fragile]{Special Targets}
251 \begin{minted}[showtabs]{make}
255 rm -f first_tgt second_tgt
258 \item \mintinline{make}{.PHONY} -- any time make considers this
259 tgt, it is run unconditionally, even if a file exists.
260 \item \mintinline{make}{.ONESHELL} -- when a tgt is built, all
261 lines will be given to a single invocation of the shell.
262 \item Lots of other special targets which are not described here.
267 \subsubsection{Pattern Rules}
270 \begin{frame}[fragile]{Special Targets}
271 \begin{minted}[showtabs]{make}
276 samtools view -b -o $@ $<
279 \item \% is the pattern stem which is accessible by
280 \mintinline{make}{$*} %$ within rules
281 \item The first rule uncompresses fasta files
282 \item The second rule turns sam files into bam files
288 \subsection{This Presentation}
290 \begin{frame}[fragile]{How this presentation is made}
291 \inputminted[showtabs]{make}{Makefile}
293 \item all is the default tgt
294 \item Make .tex files from the knitr source.
295 \item The third rule uses latexmk to build the pdf using \XeLaTeX.
299 \subsection{Calling records from SRA}
301 \begin{frame}{Calling records from SRA: The problem}
303 \item ≈200 tissue samples from Roadmap Epigenomics
304 \item No consistent workflow
305 \item Reanalyze them all using STAR and cufflinks
309 \begin{frame}[fragile]{Calling records from SRA: Downloading}
310 \begin{minted}[showtabs,breaklines]{make}
312 SRRS=SRR020291 SRR020290
314 SRR_FILES=$(patsubst %,%.sra,$(SRRS))
316 get_srr: $(SRR_FILES)
319 rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
323 \item First three lines are actually generated by other code and
325 \item Download all of the SRA files using rsync
329 \begin{frame}[fragile]{Calling records from SRA: Dumping fastq}
330 \begin{minted}[showtabs,breaklines]{make}
332 FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS))
334 FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS))
337 make_fastq: $(FASTQ_FILES)
340 $(FASTQ_FILES): %.fastq.gz: %.sra
342 %_1.fastq.gz %_2.fastq.gz: %.sra
344 $(MODULE) load sratoolkit/2.3.5-2; \
345 fastq-dump --split-3 --gzip $^;
349 \item Call fastq-dump to dump the fastq files
350 \item Handles NREADS of 1 and 2 differently
354 \begin{frame}[fragile]{Calling records from SRA: Align with STAR}
355 \begin{minted}[showtabs,breaklines]{make}
357 $(MODULE) load STAR/2.4.2a; \
358 mkdir -p $(SRX)_star; \
359 STAR --outFileNamePrefix $(SRX)_star/ \
360 --outSAMtype BAM SortedByCoordinate \
361 --runThreadN $(CORES) \
362 --outSAMstrandField intronMotif \
363 --genomeDir $(STAR_INDEX_DIR) \
364 --readFilesCommand "gzip -dc" \
365 --readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
366 ln $(SRX)_star/Aligned.*.bam $@ -s
370 \item Call STAR with lots of options to do the alignment
374 \begin{frame}[fragile]{Calling records from SRA: Call with cufflinks}
375 \begin{minted}[showtabs,breaklines]{make}
376 call: $(SRX)_genes.fpkm_tracking
378 $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF)
379 $(MODULE) load cufflinks/2.2.1; \
380 cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $<
381 for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
382 mv $${file} $(SRX)_$${file}; \
387 \item Use cufflinks to call
391 \begin{frame}[fragile]{Run it on biocluster}
392 \begin{minted}{shell}
393 ~donarm/uiuc_igb_scripts/dqsub --mem 70G \
397 \item dqsub is my own qsub wrapper which avoids me having to write
398 little scripts for everything
399 \item \url{http://git.donarmstrong.com/?p=uiuc_igb_scripts.git;a=blob;f=dqsub}
403 \section{Why not make?}
405 \begin{frame}{Why not make?}
407 \item Timestamps, not MD5sums
408 \item Complicated workflows
412 \subsection{Timestamps}
414 \begin{frame}[fragile]{Dealing with timestamps}
415 \begin{minted}[showtabs,breaklines]{make}
416 TARGET: PREREQ1 PREREQ1
417 if [ -e $@.tgt.md5sum ] && [ -e $@ ] \
418 && md5sum --status --check \
419 $@.tgt.md5sum; then \
423 md5sum $^ > $@.tgt.md5sum; \
428 \item Make builds things on the basis of timestamps
429 \item But what if the contents haven't changed and it's expensive to
435 \subsection{Complicated Workflows}
437 \begin{frame}{What about complicated workflows?}
439 \item If your workflow is really complicated, what then?
440 \item Use some other language to write your workflow in
441 \item Use a simple makefile which just runs the workflow
445 \section{Further Resources}
447 \begin{frame}{Further Resources}
449 \item GNU Make Manual: \url{https://www.gnu.org/software/make/manual/}
450 \item Mailing lists: \url{http://www.gnu.org/software/make/}
451 \item Stack overflow: \url{http://stackoverflow.com/questions/tagged/make}
452 \item Myself: \href{mailto:don@donarmstrong.com}{don@donarmstrong.com}
453 \item This presentation: \url{http://git.donarmstrong.com/using\_make\_for\_science.git}