From 965452df6fbe769c2733543b8880db096a1fb850 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Wed, 5 Aug 2015 11:48:15 -0700 Subject: [PATCH] more changes to makefile presentation --- using_make_for_science.Rnw | 99 ++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/using_make_for_science.Rnw b/using_make_for_science.Rnw index 29df376..8ff99d1 100644 --- a/using_make_for_science.Rnw +++ b/using_make_for_science.Rnw @@ -133,12 +133,31 @@ \item Gradle (Rake DSL) \item Leiningen (Clojure) \item Tweaker (task definitions in any language) + \item Ruffus (Pipeline library for python) \item \href{https://en.wikipedia.org/wiki/List_of_build_automation_software}{Wikipedia List of build automation software} \end{itemize} \end{frame} +\subsection{Why use GNU make?} +\begin{frame}{Why use GNU make?} + \begin{itemize} + \item Ubiquitous -- any machine which you can run command line tools + on has GNU make available. + \item Large community -- lots of people use GNU make. It's well + understood and you can get questions answered + \item Simple rules -- all of the rules are in a simple text file + which is easily edited and version controlled + \item Reasonable debugging -- you can see the commands that make is + going to run fairly easily: \mintinline{shell}{make -n tgt;} + \item Parallel -- make can make targets in parallel: + \mintinline{shell}{make -j8 all;} + \item Language agnostic -- make doesn't care what language your code + is written in + \end{itemize} +\end{frame} + \section{Introduction to Makefiles} \begin{frame}[fragile]{Simple Makefile} @@ -181,7 +200,7 @@ TARGETS: PREREQUISITES \item Variables can come from the environment and can be overridden on the command line: \mintinline{shell}{make FOO=bleargh} or \mintinline{shell}{FOO=blah make}. -\item \mintinline{make}{$@} -- target name %$ +\item \mintinline{make}{$@} -- tgt name %$ \item \mintinline{make}{$*} -- current stem %$ \item \mintinline{make}{$^} -- all prerequisites %$ \item \mintinline{make}{$<} -- first prerequisite %$ @@ -211,17 +230,17 @@ TARGETS: PREREQUISITES \begin{frame}[fragile]{How does make know what to build?} \begin{minted}[showtabs]{make} -first_target: +first_tgt: touch $@ -second_target: first_target +second_tgt: first_tgt touch $@ \end{minted} \begin{itemize} - \item By default, make builds the first target. - \item You can specify a specific target to build on the command line - (\mintinline{shell}{make first_target}). - \item You can change the default target by using the variable - \mintinline{make}{.DEFAULT_GOAL := second_target} + \item By default, make builds the first tgt. + \item You can specify a specific tgt to build on the command line + (\mintinline{shell}{make first_tgt}). + \item You can change the default tgt by using the variable + \mintinline{make}{.DEFAULT_GOAL := second_tgt} \end{itemize} \end{frame} @@ -233,12 +252,12 @@ second_target: first_target .PHONY: clean clean: - rm -f first_target second_target + rm -f first_tgt second_tgt \end{minted} \begin{itemize} \item \mintinline{make}{.PHONY} -- any time make considers this - target, it is run unconditionally, even if a file exists. - \item \mintinline{make}{.ONESHELL} -- when a target is built, all + tgt, it is run unconditionally, even if a file exists. + \item \mintinline{make}{.ONESHELL} -- when a tgt is built, all lines will be given to a single invocation of the shell. \item Lots of other special targets which are not described here. \end{itemize} @@ -271,7 +290,7 @@ clean: \begin{frame}[fragile]{How this presentation is made} \inputminted[showtabs]{make}{Makefile} \begin{itemize} - \item all is the default target + \item all is the default tgt \item Make .tex files from the knitr source. \item The third rule uses latexmk to build the pdf using \XeLaTeX. \end{itemize} @@ -290,7 +309,7 @@ clean: \begin{frame}[fragile]{Calling records from SRA: Downloading} \begin{minted}[showtabs,breaklines]{make} SRX=SRX007165 -SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289 +SRRS=SRR020291 SRR020290 NREADS=1 SRR_FILES=$(patsubst %,%.sra,$(SRRS)) @@ -298,7 +317,8 @@ get_srr: $(SRR_FILES) $(SRR_FILES): %.sra: rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@; -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item First three lines are actually generated by other code and included @@ -323,7 +343,8 @@ else endif $(MODULE) load sratoolkit/2.3.5-2; \ fastq-dump --split-3 --gzip $^; -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item Call fastq-dump to dump the fastq files \item Handles NREADS of 1 and 2 differently @@ -343,7 +364,8 @@ $(SRX)_star.bam: --readFilesCommand "gzip -dc" \ --readFilesIn $(TOPHAT_FASTQ_ARGUMENT); ln $(SRX)_star/Aligned.*.bam $@ -s -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item Call STAR with lots of options to do the alignment \end{itemize} @@ -359,7 +381,8 @@ $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF) for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \ mv $${file} $(SRX)_$${file}; \ done; -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item Use cufflinks to call \end{itemize} @@ -388,35 +411,47 @@ $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF) \subsection{Timestamps} -\begin{frame}{Dealing with timestamps} - \begin{itemize} - \item Make builds things on the basis of timestamps - \item But what if the contents haven't changed and it's expensive to - rebuild? - \item Use md5sum! - \end{itemize} -\begin{minted}[showtabs,breaklines]{make} +\begin{frame}[fragile]{Dealing with timestamps} + \begin{minted}[showtabs,breaklines]{make} TARGET: PREREQ1 PREREQ1 - if [ -e $@.target.md5sum ] && [ -e $@ ] && \ - md5sum --status --check $@.target.md5sum; \ - then \ + if [ -e $@.tgt.md5sum ] && [ -e $@ ] \ + && md5sum --status --check \ + $@.tgt.md5sum; then \ touch $@; \ else \ - RECIPE FOR $@; \ - md5sum $^ > $@.target.md5sum; \ + RECIPE FOR $@; \ + md5sum $^ > $@.tgt.md5sum; \ fi; \end{minted} +% $ +\begin{itemize} +\item Make builds things on the basis of timestamps +\item But what if the contents haven't changed and it's expensive to + rebuild? +\item Use md5sum! +\end{itemize} \end{frame} \subsection{Complicated Workflows} -\begin{frame}{Complicated Workflow} - +\begin{frame}{What about complicated workflows?} + \begin{itemize} + \item If your workflow is really complicated, what then? + \item Use some other language to write your workflow in + \item Use a simple makefile which just runs the workflow + \end{itemize} \end{frame} \section{Further Resources} \begin{frame}{Further Resources} + \begin{itemize} + \item GNU Make Manual: \url{https://www.gnu.org/software/make/manual/} + \item Mailing lists: \url{http://www.gnu.org/software/make/} + \item Stack overflow: \url{http://stackoverflow.com/questions/tagged/make} + \item Myself: \href{mailto:don@donarmstrong.com}{don@donarmstrong.com} + \item This presentation: \url{http://git.donarmstrong.com/using\_make\_for\_science.git} + \end{itemize} \end{frame} \end{document} -- 2.39.2