X-Git-Url: https://git.donarmstrong.com/?p=using_make_for_science.git;a=blobdiff_plain;f=using_make_for_science.Rnw;h=9fd424a3eb6ce1d2514644c519653b8815cfd3fa;hp=29df3764fdd518e8ea610cde87bc363646b76fc9;hb=2b141c12e69485e71e36649c8128a28deddcf373;hpb=efca486c9cd9f5f602ccd59864442f2e93341b9a diff --git a/using_make_for_science.Rnw b/using_make_for_science.Rnw index 29df376..9fd424a 100644 --- a/using_make_for_science.Rnw +++ b/using_make_for_science.Rnw @@ -89,6 +89,8 @@ \subject{make for science} \begin{document} +\IfFileExists{./relevant_xkcd.png}{\frame[plain]{\centering \includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./relevant_xkcd.png}}} + \frame[plain]{\titlepage} \mode
{\maketitle} @@ -133,12 +135,31 @@ \item Gradle (Rake DSL) \item Leiningen (Clojure) \item Tweaker (task definitions in any language) + \item Ruffus (Pipeline library for python) \item \href{https://en.wikipedia.org/wiki/List_of_build_automation_software}{Wikipedia List of build automation software} \end{itemize} \end{frame} +\subsection{Why use GNU make?} +\begin{frame}{Why use GNU make?} + \begin{itemize} + \item Ubiquitous -- any machine which you can run command line tools + on has GNU make available. + \item Large community -- lots of people use GNU make. It's well + understood and you can get questions answered + \item Simple rules -- all of the rules are in a simple text file + which is easily edited and version controlled + \item Reasonable debugging -- you can see the commands that make is + going to run fairly easily: \mintinline{shell}{make -n target;} + \item Parallel -- make can make targets in parallel: + \mintinline{shell}{make -j8 all;} + \item Language agnostic -- make doesn't care what language your code + is written in + \end{itemize} +\end{frame} + \section{Introduction to Makefiles} \begin{frame}[fragile]{Simple Makefile} @@ -269,9 +290,12 @@ clean: \subsection{This Presentation} \begin{frame}[fragile]{How this presentation is made} -\inputminted[showtabs]{make}{Makefile} +\inputminted[showtabs,breaklines,firstline=3]{make}{Makefile} +\end{frame} +\begin{frame}[fragile]{How this presentation is made} \begin{itemize} - \item all is the default target + \item all is the default + \item Download the optional relevant\_xkcd.png \item Make .tex files from the knitr source. \item The third rule uses latexmk to build the pdf using \XeLaTeX. \end{itemize} @@ -290,7 +314,7 @@ clean: \begin{frame}[fragile]{Calling records from SRA: Downloading} \begin{minted}[showtabs,breaklines]{make} SRX=SRX007165 -SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289 +SRRS=SRR020291 SRR020290 NREADS=1 SRR_FILES=$(patsubst %,%.sra,$(SRRS)) @@ -298,7 +322,8 @@ get_srr: $(SRR_FILES) $(SRR_FILES): %.sra: rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@; -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item First three lines are actually generated by other code and included @@ -315,7 +340,17 @@ FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRR endif make_fastq: $(FASTQ_FILES) +\end{minted} +%$ +\begin{itemize} +\item Use ifeq/else/endif to handle paired reads differently from + unpaired reads +\item FASTQ\_FILES is the full set of fastq files dumped from the SRAs. +\end{itemize} +\end{frame} +\begin{frame}[fragile]{Calling records from SRA: Dumping fastq #2} +\begin{minted}[showtabs,breaklines]{make} ifeq ($(NREADS),1) $(FASTQ_FILES): %.fastq.gz: %.sra else @@ -323,10 +358,11 @@ else endif $(MODULE) load sratoolkit/2.3.5-2; \ fastq-dump --split-3 --gzip $^; -\end{minted} %$ +\end{minted} +%$ \begin{itemize} - \item Call fastq-dump to dump the fastq files \item Handles NREADS of 1 and 2 differently + \item Call fastq-dump to dump the fastq files \end{itemize} \end{frame} @@ -343,7 +379,8 @@ $(SRX)_star.bam: --readFilesCommand "gzip -dc" \ --readFilesIn $(TOPHAT_FASTQ_ARGUMENT); ln $(SRX)_star/Aligned.*.bam $@ -s -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item Call STAR with lots of options to do the alignment \end{itemize} @@ -359,7 +396,8 @@ $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF) for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \ mv $${file} $(SRX)_$${file}; \ done; -\end{minted} %$ +\end{minted} +%$ \begin{itemize} \item Use cufflinks to call \end{itemize} @@ -388,35 +426,52 @@ $(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF) \subsection{Timestamps} -\begin{frame}{Dealing with timestamps} - \begin{itemize} - \item Make builds things on the basis of timestamps - \item But what if the contents haven't changed and it's expensive to - rebuild? - \item Use md5sum! - \end{itemize} -\begin{minted}[showtabs,breaklines]{make} +\begin{frame}[fragile]{Dealing with timestamps} + \begin{minted}[showtabs,breaklines]{make} TARGET: PREREQ1 PREREQ1 - if [ -e $@.target.md5sum ] && [ -e $@ ] && \ - md5sum --status --check $@.target.md5sum; \ - then \ + if [ -e $@.tgt.md5sum ] && [ -e $@ ] \ + && md5sum --status --check \ + $@.tgt.md5sum; then \ touch $@; \ else \ - RECIPE FOR $@; \ - md5sum $^ > $@.target.md5sum; \ + RECIPE FOR $@; \ + md5sum $^ > $@.tgt.md5sum; \ fi; \end{minted} +% $ +\begin{itemize} +\item Make builds things on the basis of timestamps +\item But what if the contents haven't changed and it's expensive to + rebuild? +\item Use md5sum! +\end{itemize} \end{frame} \subsection{Complicated Workflows} -\begin{frame}{Complicated Workflow} - +\begin{frame}[fragile]{What about complicated workflows?} + \begin{itemize} + \item If your workflow is really complicated, what then? + \item Use some other language to write your workflow in + \item Use a simple makefile which just runs the workflow + \end{itemize} + \begin{minted}[showtabs,breaklines]{make} +complicated_workflow_done: req1 req2 req3 + ./complicated_workflow.sh $^; + touch $@; +\end{minted} \end{frame} \section{Further Resources} \begin{frame}{Further Resources} + \begin{itemize} + \item GNU Make Manual: \url{https://www.gnu.org/software/make/manual/} + \item Mailing lists: \url{http://www.gnu.org/software/make/} + \item Stack overflow: \url{http://stackoverflow.com/questions/tagged/make} + \item Myself: \href{mailto:don@donarmstrong.com}{don@donarmstrong.com} + \item This presentation: \url{http://git.donarmstrong.com/using\_make\_for\_science.git} + \end{itemize} \end{frame} \end{document}