1 \documentclass[ignorenonframetext]{beamer}
2 \usepackage[Symbols,MiscellaneousSymbols]{ucharclasses}
5 \setmainfont{FreeSerif}
12 \usepackage{threeparttable}
13 \usepackage[backend=biber,natbib=true,hyperref=true,style=numeric-comp]{biblatex}
14 \bibliography{references}
15 \usepackage[nomargin,inline,draft]{fixme}
19 \usepackage{zref-xr,zref-user}
21 \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
23 \usepackage[x11names,svgnames,usenames,dvipsnames]{xcolor}
24 \usepackage[noxcolor]{beamerarticle}
27 \usepackage[bf]{caption}
32 \usepackage{adjustbox}
33 \usepackage{longtable}
35 \usepackage{pdflscape}
36 \usepackage[hyperfigures,bookmarks,colorlinks]{hyperref}
46 \usepackage{tcolorbox}
48 \BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
49 \AfterEndEnvironment{minted}{\end{tcolorbox}}%
52 \usetheme{CambridgeUS}
53 % http://identitystandards.illinois.edu/graphicstandardsmanual/generalguidelines/colors.html
54 \definecolor{ilboldblue}{HTML}{002058}
55 \definecolor{ilboldorange}{HTML}{E87722}
56 \definecolor{ilblue}{HTML}{606EB2}
57 \definecolor{ilorange}{HTML}{D45D00}
58 \setbeamercolor{alerted text}{fg=ilboldblue}
59 \setbeamercolor*{palette primary}{fg=ilblue,bg=ilorange}
60 \setbeamercolor*{palette secondary}{fg=ilblue!20!white,bg=ilorange}
61 \setbeamercolor*{palette tertiary}{bg=ilblue,fg=ilorange}
62 \setbeamercolor*{palette quaternary}{fg=ilblue,bg=ilorange}
64 \setbeamercolor*{sidebar}{fg=ilorange,bg=ilboldblue}
66 \setbeamercolor*{palette sidebar primary}{fg=ilblue!10!white,bg=ilorange}
67 \setbeamercolor*{palette sidebar secondary}{fg=ilorange}
68 \setbeamercolor*{palette sidebar tertiary}{fg=ilblue}
69 \setbeamercolor*{palette sidebar quaternary}{fg=ilorange}
71 % \setbeamercolor*{titlelike}{parent=palette primary}
72 \setbeamercolor{titlelike}{parent=palette primary,fg=ilboldblue,bg=ilorange}
73 \setbeamercolor{frametitle}{fg=ilboldorange,bg=ilblue!80!white}
74 \setbeamercolor{frametitle right}{fg=ilboldblue,bg=ilorange}
76 \setbeamercolor*{separation line}{}
77 \setbeamercolor*{fine separation line}{}
78 \setbeamercovered{transparent}
79 \logo{\begin{tikzpicture}% Pale figure
80 {\node[opacity=0.7]{\IfFileExists{./logo.pdf}{\includegraphics[height=1.5cm]{logo.pdf}}{}%
85 \title{Using make for science}
87 \author{Don Armstrong}
89 \subject{make for science}
92 \frame[plain]{\titlepage}
94 \mode<article>{\maketitle}
96 \section{What make was made for}
97 \begin{frame}{What was make originally made to do?}
99 \item Compiling and installing software from source
100 \item Replacement of operating system specific compilation and
101 installation shell scripts
102 \item Re-compile when dependencies of the software were modified
106 \subsection{Brief history of makes}
108 \begin{frame}{Brief history of make-alikes}
111 \href{http://pubs.opengroup.org/onlinepubs/009695399/utilities/make.html}{POSIX
112 Make} (standardization of basic features of make)
113 \item \href{http://www.gnu.org/software/make/manual/}{GNU Make}
114 (standard make on Linux and OS X)
115 \item \href{https://www.freebsd.org/cgi/man.cgi?query=make(1)}{BSD
116 Make} (pmake or bmake)
118 \href{https://msdn.microsoft.com/en-us/library/dd9y37ha.aspx}{nmake}
119 (Part of visual studio)
120 \item \href{http://plan9.bell-labs.com/sys/doc/mk.html}{Mk} (Plan 9
125 \subsection{Other solutions in this problem space}
127 \begin{frame}{Other non-make dependency builders}
129 \item Ant (popular for java software)
130 \item Cabal (popular for Haskell)
131 \item Maven (also java)
132 \item Rake (ruby build took)
133 \item Gradle (Rake DSL)
134 \item Leiningen (Clojure)
135 \item Tweaker (task definitions in any language)
137 \href{https://en.wikipedia.org/wiki/List_of_build_automation_software}{Wikipedia
138 List of build automation software}
142 \section{Introduction to Makefiles}
144 \begin{frame}[fragile]{Simple Makefile}
145 \begin{minted}[showtabs]{make}
147 echo "hello world" > hello_world
152 \subsection{General Syntax}
154 \begin{frame}[fragile]{Simple Makefile}
155 \begin{minted}[showtabs]{make}
156 TARGETS: PREREQUISITES
160 \item TARGETS are file names separated by spaces
161 \item PREREQUISITES are file names separated by spaces.
162 \item RECIPE lines start with a tab, are executed by the shell and
163 describe how to make the TARGETS (generally from the PREREQUISITES)
164 \item A TARGET is out of date if it does not exist or if it is older
165 than any of the prerequisites.
169 \subsection{Variables}
171 \begin{frame}[fragile]{Some Variables}
173 \item Two flavors of variables
175 \item \mintinline{make}{FOO=bar} -- recursively expanded variables;
176 references to other variables are expanded at the time this
178 \item \mintinline{make}{FOO:=bar} -- simply expanded variables; the
179 value is assigned at the moment the variable is created
181 \item Variables can come from the environment and can be overridden on
182 the command line: \mintinline{shell}{make FOO=bleargh} or
183 \mintinline{shell}{FOO=blah make}.
184 \item \mintinline{make}{$@} -- target name %$
185 \item \mintinline{make}{$*} -- current stem %$
186 \item \mintinline{make}{$^} -- all prerequisites %$
187 \item \mintinline{make}{$<} -- first prerequisite %$
188 \item \mintinline{make}{$(FOO)} -- how variables are referenced %$
192 \subsection{Functions}
194 \begin{frame}[fragile]{Some Functions}
196 \item \mintinline{make}{$(patsubst %.bam,%.sam,foo.sam bar.sam)} %$
197 -- returns foo.bam bar.bam.
198 \item \mintinline{make}{$(filter-out %.bam,foo.sam bar.bam)} %$
200 \item \mintinline{make}{$(words foo.sam bar.bam)} %$
201 -- returns the number of words in its argument (2)
202 \item \mintinline{make}{$(wordlist 1,2,foo.sam bar.bam bleargh.foo)} %$
203 -- returns the words in its last argument starting with the 1st
204 and ending with the second.
210 \subsubsection{Default Target}
212 \begin{frame}[fragile]{How does make know what to build?}
213 \begin{minted}[showtabs]{make}
216 second_target: first_target
220 \item By default, make builds the first target.
221 \item You can specify a specific target to build on the command line
222 (\mintinline{shell}{make first_target}).
223 \item You can change the default target by using the variable
224 \mintinline{make}{.DEFAULT_GOAL := second_target}
229 \subsubsection{Special Targets}
231 \begin{frame}[fragile]{Special Targets}
232 \begin{minted}[showtabs]{make}
236 rm -f first_target second_target
239 \item \mintinline{make}{.PHONY} -- any time make considers this
240 target, it is run unconditionally, even if a file exists.
241 \item \mintinline{make}{.ONESHELL} -- when a target is built, all
242 lines will be given to a single invocation of the shell.
243 \item Lots of other special targets which are not described here.
248 \subsubsection{Pattern Rules}
251 \begin{frame}[fragile]{Special Targets}
252 \begin{minted}[showtabs]{make}
257 samtools view -b -o $@ $<
260 \item \% is the pattern stem which is accessible by
261 \mintinline{make}{$*} %$ within rules
262 \item The first rule uncompresses fasta files
263 \item The second rule turns sam files into bam files
269 \subsection{This Presentation}
271 \begin{frame}[fragile]{How this presentation is made}
272 \inputminted[showtabs]{make}{Makefile}
274 \item all is the default target
275 \item Make .tex files from the knitr source.
276 \item The third rule uses latexmk to build the pdf using \XeLaTeX.
280 \subsection{Calling records from SRA}
282 \begin{frame}{Calling records from SRA: The problem}
284 \item ≈200 tissue samples from Roadmap Epigenomics
285 \item No consistent workflow
286 \item Reanalyze them all using STAR and cufflinks
290 \begin{frame}[fragile]{Calling records from SRA: Downloading}
291 \begin{minted}[showtabs,breaklines]{make}
293 SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289
295 SRR_FILES=$(patsubst %,%.sra,$(SRRS))
297 get_srr: $(SRR_FILES)
300 rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
303 \item First three lines are actually generated by other code and
305 \item Download all of the SRA files using rsync
309 \begin{frame}[fragile]{Calling records from SRA: Dumping fastq}
310 \begin{minted}[showtabs,breaklines]{make}
312 FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS))
314 FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS))
317 make_fastq: $(FASTQ_FILES)
320 $(FASTQ_FILES): %.fastq.gz: %.sra
322 %_1.fastq.gz %_2.fastq.gz: %.sra
324 $(MODULE) load sratoolkit/2.3.5-2; \
325 fastq-dump --split-3 --gzip $^;
328 \item Call fastq-dump to dump the fastq files
329 \item Handles NREADS of 1 and 2 differently
333 \begin{frame}[fragile]{Calling records from SRA: Align with STAR}
334 \begin{minted}[showtabs,breaklines]{make}
336 $(MODULE) load STAR/2.4.2a; \
337 mkdir -p $(SRX)_star; \
338 STAR --outFileNamePrefix $(SRX)_star/ \
339 --outSAMtype BAM SortedByCoordinate \
340 --runThreadN $(CORES) \
341 --outSAMstrandField intronMotif \
342 --genomeDir $(STAR_INDEX_DIR) \
343 --readFilesCommand "gzip -dc" \
344 --readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
345 ln $(SRX)_star/Aligned.*.bam $@ -s
348 \item Call STAR with lots of options to do the alignment
352 \begin{frame}[fragile]{Calling records from SRA: Call with cufflinks}
353 \begin{minted}[showtabs,breaklines]{make}
354 call: $(SRX)_genes.fpkm_tracking
356 $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF)
357 $(MODULE) load cufflinks/2.2.1; \
358 cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $<
359 for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
360 mv $${file} $(SRX)_$${file}; \
364 \item Use cufflinks to call
368 \begin{frame}[fragile]{Run it on biocluster}
369 \begin{minted}{shell}
370 for a in SRX*/Makefile; do
372 MAKE_TARGET=call qsub -q default \
373 -v MAKE_TARGET -S /bin/bash \
374 -d "$(pwd)" -l "nodes=1:ppn=8,mem=70G" \
375 ~donarm/uiuc_igb_scripts/run_make
380 \item Use qsub and some shell scripting to run everything
381 \item Uses a special script which calls make with a specific target
385 \section{Why not make?}
387 \begin{frame}{Why not make?}
389 \item Timestamps, not MD5sums
390 \item Complicated workflows
394 \subsection{Timestamps}
396 \begin{frame}{Dealing with timestamps}
399 \subsection{Complicated Workflows}
401 \begin{frame}{Complicated Workflow}
405 \section{Further Resources}
407 \begin{frame}{Further Resources}