From 7bef02dcc059cc1de1390d9232da4e3868dda207 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Tue, 4 Aug 2015 15:33:35 -0700 Subject: [PATCH] add more examples of makefiles --- using_make_for_science.Rnw | 213 ++++++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 5 deletions(-) diff --git a/using_make_for_science.Rnw b/using_make_for_science.Rnw index 7903454..f418d49 100644 --- a/using_make_for_science.Rnw +++ b/using_make_for_science.Rnw @@ -17,6 +17,7 @@ \usepackage{tikz} \usepackage{nameref} \usepackage{zref-xr,zref-user} +\usepackage{metalogo} \IfFileExists{upquote.sty}{\usepackage{upquote}}{} \mode
{ \usepackage[x11names,svgnames,usenames,dvipsnames]{xcolor} @@ -42,6 +43,10 @@ } \usepackage{minted} +\usepackage{tcolorbox} +\usepackage{etoolbox} +\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}% +\AfterEndEnvironment{minted}{\end{tcolorbox}}% \mode{ \usetheme{CambridgeUS} @@ -141,39 +146,237 @@ hello_world: echo "hello world" > hello_world \end{minted} + \end{frame} \subsection{General Syntax} - \begin{frame}[fragile]{Simple Makefile} \begin{minted}[showtabs]{make} -hello_world: - echo "hello world" > hello_world +TARGETS: PREREQUISITES + RECIPE \end{minted} +\begin{itemize} +\item TARGETS are file names separated by spaces +\item PREREQUISITES are file names separated by spaces. +\item RECIPE lines start with a tab, are executed by the shell and + describe how to make the TARGETS (generally from the PREREQUISITES) +\item A TARGET is out of date if it does not exist or if it is older + than any of the prerequisites. +\end{itemize} \end{frame} \subsection{Variables} +\begin{frame}[fragile]{Some Variables} +\begin{itemize} +\item Two flavors of variables + \begin{itemize} + \item \mintinline{make}{FOO=bar} -- recursively expanded variables; + references to other variables are expanded at the time this + variable is expanded + \item \mintinline{make}{FOO:=bar} -- simply expanded variables; the + value is assigned at the moment the variable is created + \end{itemize} +\item Variables can come from the environment and can be overridden on + the command line: \mintinline{shell}{make FOO=bleargh} or + \mintinline{shell}{FOO=blah make}. +\item \mintinline{make}{$@} -- target name %$ +\item \mintinline{make}{$*} -- current stem %$ +\item \mintinline{make}{$^} -- all prerequisites %$ +\item \mintinline{make}{$<} -- first prerequisite %$ +\item \mintinline{make}{$(FOO)} -- how variables are referenced %$ +\end{itemize} +\end{frame} + +\subsection{Functions} + +\begin{frame}[fragile]{Some Functions} + \begin{itemize} + \item \mintinline{make}{$(patsubst %.bam,%.sam,foo.sam bar.sam)} %$ + -- returns foo.bam bar.bam. + \item \mintinline{make}{$(filter-out %.bam,foo.sam bar.bam)} %$ + -- returns foo.sam + \item \mintinline{make}{$(words foo.sam bar.bam)} %$ + -- returns the number of words in its argument (2) + \item \mintinline{make}{$(wordlist 1,2,foo.sam bar.bam bleargh.foo)} %$ + -- returns the words in its last argument starting with the 1st + and ending with the second. + \end{itemize} +\end{frame} + \subsection{Rules} \subsubsection{Default Target} +\begin{frame}[fragile]{How does make know what to build?} +\begin{minted}[showtabs]{make} +first_target: + touch $@ +second_target: first_target + touch $@ +\end{minted} + \begin{itemize} + \item By default, make builds the first target. + \item You can specify a specific target to build on the command line + (\mintinline{shell}{make first_target}). + \item You can change the default target by using the variable + \mintinline{make}{.DEFAULT_GOAL := second_target} + \end{itemize} +\end{frame} + + \subsubsection{Special Targets} +\begin{frame}[fragile]{Special Targets} +\begin{minted}[showtabs]{make} +.PHONY: clean + +clean: + rm -f first_target second_target +\end{minted} + \begin{itemize} + \item \mintinline{make}{.PHONY} -- any time make considers this + target, it is run unconditionally, even if a file exists. + \item \mintinline{make}{.ONESHELL} -- when a target is built, all + lines will be given to a single invocation of the shell. + \item Lots of other special targets which are not described here. + \end{itemize} +\end{frame} + + \subsubsection{Pattern Rules} -\subsection{Functions} +\begin{frame}[fragile]{Special Targets} +\begin{minted}[showtabs]{make} +%.fasta.gz: %.fasta + gzip -dc $< > $@ + +%.bam: %.sam + samtools view -b -o $@ $< +\end{minted} + \begin{itemize} + \item \% is the pattern stem which is accessible by + \mintinline{make}{$*} %$ within rules + \item The first rule uncompresses fasta files + \item The second rule turns sam files into bam files + \end{itemize} +\end{frame} \section{Examples} \subsection{This Presentation} -\subsection{Can you dig it?} +\begin{frame}[fragile]{How this presentation is made} +\inputminted[showtabs]{make}{Makefile} + \begin{itemize} + \item all is the default target + \item Make .tex files from the knitr source. + \item The third rule uses latexmk to build the pdf using \XeLaTeX. + \end{itemize} +\end{frame} \subsection{Calling records from SRA} +\begin{frame}{Calling records from SRA: The problem} + \begin{itemize} + \item ≈200 tissue samples from Roadmap Epigenomics + \item No consistent workflow + \item Reanalyze them all using STAR and cufflinks + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Calling records from SRA: Downloading} +\begin{minted}[showtabs,breaklines]{make} +SRX=SRX007165 +SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289 +NREADS=1 +SRR_FILES=$(patsubst %,%.sra,$(SRRS)) + +get_srr: $(SRR_FILES) + +$(SRR_FILES): %.sra: + rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@; +\end{minted} %$ + \begin{itemize} + \item First three lines are actually generated by other code and + included + \item Download all of the SRA files using rsync + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Calling records from SRA: Dumping fastq} +\begin{minted}[showtabs,breaklines]{make} +ifeq ($(NREADS),1) +FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS)) +else +FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS)) +endif + +make_fastq: $(FASTQ_FILES) + +ifeq ($(NREADS),1) +$(FASTQ_FILES): %.fastq.gz: %.sra +else +%_1.fastq.gz %_2.fastq.gz: %.sra +endif + $(MODULE) load sratoolkit/2.3.5-2; \ + fastq-dump --split-3 --gzip $^; +\end{minted} %$ + \begin{itemize} + \item Call fastq-dump to dump the fastq files + \item Handles NREADS of 1 and 2 differently + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Calling records from SRA: Align with STAR} +\begin{minted}[showtabs,breaklines]{make} +$(SRX)_star.bam: + $(MODULE) load STAR/2.4.2a; \ + mkdir -p $(SRX)_star; \ + STAR --outFileNamePrefix $(SRX)_star/ \ + --outSAMtype BAM SortedByCoordinate \ + --runThreadN $(CORES) \ + --outSAMstrandField intronMotif \ + --genomeDir $(STAR_INDEX_DIR) \ + --readFilesCommand "gzip -dc" \ + --readFilesIn $(TOPHAT_FASTQ_ARGUMENT); + ln $(SRX)_star/Aligned.sortedByCoord.out.bam $@ -s +\end{minted} %$ + \begin{itemize} + \item Call STAR with lots of options to do the alignment + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Calling records from SRA: Call with cufflinks} +\begin{minted}[showtabs,breaklines]{make} +call: $(SRX)_genes.fpkm_tracking + +$(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF) + $(MODULE) load cufflinks/2.2.1; \ + cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $< + for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \ + mv $${file} $(SRX)_$${file}; \ + done; +\end{minted} %$ + \begin{itemize} + \item Use cufflinks to call + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Run it on biocluster} + \begin{minted}[shell] +for a in SRX*/Makefile; do + (cd $(dirname $a); + MAKE_TARGET=call qsub -q default -v MAKE_TARGET -S /bin/bash \ + -d "$(pwd)" -l "nodes=1:ppn=8,mem=12G" \ + ~donarm/uiuc_igb_scripts/run_make + ); +done; +\end{minted} %$ +\end{frame} + \section{Why not make?} \subsection{Timestamps} -- 2.39.2