\usepackage{tikz}
\usepackage{nameref}
\usepackage{zref-xr,zref-user}
+\usepackage{metalogo}
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\mode<article>{
\usepackage[x11names,svgnames,usenames,dvipsnames]{xcolor}
}
\usepackage{minted}
+\usepackage{tcolorbox}
+\usepackage{etoolbox}
+\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
+\AfterEndEnvironment{minted}{\end{tcolorbox}}%
\mode<presentation>{
\usetheme{CambridgeUS}
hello_world:
echo "hello world" > hello_world
\end{minted}
+
\end{frame}
\subsection{General Syntax}
-
\begin{frame}[fragile]{Simple Makefile}
\begin{minted}[showtabs]{make}
-hello_world:
- echo "hello world" > hello_world
+TARGETS: PREREQUISITES
+ RECIPE
\end{minted}
+\begin{itemize}
+\item TARGETS are file names separated by spaces
+\item PREREQUISITES are file names separated by spaces.
+\item RECIPE lines start with a tab, are executed by the shell and
+ describe how to make the TARGETS (generally from the PREREQUISITES)
+\item A TARGET is out of date if it does not exist or if it is older
+ than any of the prerequisites.
+\end{itemize}
\end{frame}
\subsection{Variables}
+\begin{frame}[fragile]{Some Variables}
+\begin{itemize}
+\item Two flavors of variables
+ \begin{itemize}
+ \item \mintinline{make}{FOO=bar} -- recursively expanded variables;
+ references to other variables are expanded at the time this
+ variable is expanded
+ \item \mintinline{make}{FOO:=bar} -- simply expanded variables; the
+ value is assigned at the moment the variable is created
+ \end{itemize}
+\item Variables can come from the environment and can be overridden on
+ the command line: \mintinline{shell}{make FOO=bleargh} or
+ \mintinline{shell}{FOO=blah make}.
+\item \mintinline{make}{$@} -- target name %$
+\item \mintinline{make}{$*} -- current stem %$
+\item \mintinline{make}{$^} -- all prerequisites %$
+\item \mintinline{make}{$<} -- first prerequisite %$
+\item \mintinline{make}{$(FOO)} -- how variables are referenced %$
+\end{itemize}
+\end{frame}
+
+\subsection{Functions}
+
+\begin{frame}[fragile]{Some Functions}
+ \begin{itemize}
+ \item \mintinline{make}{$(patsubst %.bam,%.sam,foo.sam bar.sam)} %$
+ -- returns foo.bam bar.bam.
+ \item \mintinline{make}{$(filter-out %.bam,foo.sam bar.bam)} %$
+ -- returns foo.sam
+ \item \mintinline{make}{$(words foo.sam bar.bam)} %$
+ -- returns the number of words in its argument (2)
+ \item \mintinline{make}{$(wordlist 1,2,foo.sam bar.bam bleargh.foo)} %$
+ -- returns the words in its last argument starting with the 1st
+ and ending with the second.
+ \end{itemize}
+\end{frame}
+
\subsection{Rules}
\subsubsection{Default Target}
+\begin{frame}[fragile]{How does make know what to build?}
+\begin{minted}[showtabs]{make}
+first_target:
+ touch $@
+second_target: first_target
+ touch $@
+\end{minted}
+ \begin{itemize}
+ \item By default, make builds the first target.
+ \item You can specify a specific target to build on the command line
+ (\mintinline{shell}{make first_target}).
+ \item You can change the default target by using the variable
+ \mintinline{make}{.DEFAULT_GOAL := second_target}
+ \end{itemize}
+\end{frame}
+
+
\subsubsection{Special Targets}
+\begin{frame}[fragile]{Special Targets}
+\begin{minted}[showtabs]{make}
+.PHONY: clean
+
+clean:
+ rm -f first_target second_target
+\end{minted}
+ \begin{itemize}
+ \item \mintinline{make}{.PHONY} -- any time make considers this
+ target, it is run unconditionally, even if a file exists.
+ \item \mintinline{make}{.ONESHELL} -- when a target is built, all
+ lines will be given to a single invocation of the shell.
+ \item Lots of other special targets which are not described here.
+ \end{itemize}
+\end{frame}
+
+
\subsubsection{Pattern Rules}
-\subsection{Functions}
+\begin{frame}[fragile]{Special Targets}
+\begin{minted}[showtabs]{make}
+%.fasta.gz: %.fasta
+ gzip -dc $< > $@
+
+%.bam: %.sam
+ samtools view -b -o $@ $<
+\end{minted}
+ \begin{itemize}
+ \item \% is the pattern stem which is accessible by
+ \mintinline{make}{$*} %$ within rules
+ \item The first rule uncompresses fasta files
+ \item The second rule turns sam files into bam files
+ \end{itemize}
+\end{frame}
\section{Examples}
\subsection{This Presentation}
-\subsection{Can you dig it?}
+\begin{frame}[fragile]{How this presentation is made}
+\inputminted[showtabs]{make}{Makefile}
+ \begin{itemize}
+ \item all is the default target
+ \item Make .tex files from the knitr source.
+ \item The third rule uses latexmk to build the pdf using \XeLaTeX.
+ \end{itemize}
+\end{frame}
\subsection{Calling records from SRA}
+\begin{frame}{Calling records from SRA: The problem}
+ \begin{itemize}
+ \item ≈200 tissue samples from Roadmap Epigenomics
+ \item No consistent workflow
+ \item Reanalyze them all using STAR and cufflinks
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Downloading}
+\begin{minted}[showtabs,breaklines]{make}
+SRX=SRX007165
+SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289
+NREADS=1
+SRR_FILES=$(patsubst %,%.sra,$(SRRS))
+
+get_srr: $(SRR_FILES)
+
+$(SRR_FILES): %.sra:
+ rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
+\end{minted} %$
+ \begin{itemize}
+ \item First three lines are actually generated by other code and
+ included
+ \item Download all of the SRA files using rsync
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Dumping fastq}
+\begin{minted}[showtabs,breaklines]{make}
+ifeq ($(NREADS),1)
+FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS))
+else
+FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS))
+endif
+
+make_fastq: $(FASTQ_FILES)
+
+ifeq ($(NREADS),1)
+$(FASTQ_FILES): %.fastq.gz: %.sra
+else
+%_1.fastq.gz %_2.fastq.gz: %.sra
+endif
+ $(MODULE) load sratoolkit/2.3.5-2; \
+ fastq-dump --split-3 --gzip $^;
+\end{minted} %$
+ \begin{itemize}
+ \item Call fastq-dump to dump the fastq files
+ \item Handles NREADS of 1 and 2 differently
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Align with STAR}
+\begin{minted}[showtabs,breaklines]{make}
+$(SRX)_star.bam:
+ $(MODULE) load STAR/2.4.2a; \
+ mkdir -p $(SRX)_star; \
+ STAR --outFileNamePrefix $(SRX)_star/ \
+ --outSAMtype BAM SortedByCoordinate \
+ --runThreadN $(CORES) \
+ --outSAMstrandField intronMotif \
+ --genomeDir $(STAR_INDEX_DIR) \
+ --readFilesCommand "gzip -dc" \
+ --readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
+ ln $(SRX)_star/Aligned.sortedByCoord.out.bam $@ -s
+\end{minted} %$
+ \begin{itemize}
+ \item Call STAR with lots of options to do the alignment
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Call with cufflinks}
+\begin{minted}[showtabs,breaklines]{make}
+call: $(SRX)_genes.fpkm_tracking
+
+$(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF)
+ $(MODULE) load cufflinks/2.2.1; \
+ cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $<
+ for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
+ mv $${file} $(SRX)_$${file}; \
+ done;
+\end{minted} %$
+ \begin{itemize}
+ \item Use cufflinks to call
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Run it on biocluster}
+ \begin{minted}[shell]
+for a in SRX*/Makefile; do
+ (cd $(dirname $a);
+ MAKE_TARGET=call qsub -q default -v MAKE_TARGET -S /bin/bash \
+ -d "$(pwd)" -l "nodes=1:ppn=8,mem=12G" \
+ ~donarm/uiuc_igb_scripts/run_make
+ );
+done;
+\end{minted} %$
+\end{frame}
+
\section{Why not make?}
\subsection{Timestamps}