+\begin{frame}{Calling records from SRA: The problem}
+ \begin{itemize}
+ \item ≈200 tissue samples from Roadmap Epigenomics
+ \item No consistent workflow
+ \item Reanalyze them all using STAR and cufflinks
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Downloading}
+\begin{minted}[showtabs,breaklines]{make}
+SRX=SRX007165
+SRRS=SRR020291 SRR020290 SRR020288 SRR020287 SRR020289
+NREADS=1
+SRR_FILES=$(patsubst %,%.sra,$(SRRS))
+
+get_srr: $(SRR_FILES)
+
+$(SRR_FILES): %.sra:
+ rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
+\end{minted} %$
+ \begin{itemize}
+ \item First three lines are actually generated by other code and
+ included
+ \item Download all of the SRA files using rsync
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Dumping fastq}
+\begin{minted}[showtabs,breaklines]{make}
+ifeq ($(NREADS),1)
+FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS))
+else
+FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS))
+endif
+
+make_fastq: $(FASTQ_FILES)
+
+ifeq ($(NREADS),1)
+$(FASTQ_FILES): %.fastq.gz: %.sra
+else
+%_1.fastq.gz %_2.fastq.gz: %.sra
+endif
+ $(MODULE) load sratoolkit/2.3.5-2; \
+ fastq-dump --split-3 --gzip $^;
+\end{minted} %$
+ \begin{itemize}
+ \item Call fastq-dump to dump the fastq files
+ \item Handles NREADS of 1 and 2 differently
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Align with STAR}
+\begin{minted}[showtabs,breaklines]{make}
+$(SRX)_star.bam:
+ $(MODULE) load STAR/2.4.2a; \
+ mkdir -p $(SRX)_star; \
+ STAR --outFileNamePrefix $(SRX)_star/ \
+ --outSAMtype BAM SortedByCoordinate \
+ --runThreadN $(CORES) \
+ --outSAMstrandField intronMotif \
+ --genomeDir $(STAR_INDEX_DIR) \
+ --readFilesCommand "gzip -dc" \
+ --readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
+ ln $(SRX)_star/Aligned.sortedByCoord.out.bam $@ -s
+\end{minted} %$
+ \begin{itemize}
+ \item Call STAR with lots of options to do the alignment
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Call with cufflinks}
+\begin{minted}[showtabs,breaklines]{make}
+call: $(SRX)_genes.fpkm_tracking
+
+$(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF)
+ $(MODULE) load cufflinks/2.2.1; \
+ cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $<
+ for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
+ mv $${file} $(SRX)_$${file}; \
+ done;
+\end{minted} %$
+ \begin{itemize}
+ \item Use cufflinks to call
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Run it on biocluster}
+ \begin{minted}[shell]
+for a in SRX*/Makefile; do
+ (cd $(dirname $a);
+ MAKE_TARGET=call qsub -q default -v MAKE_TARGET -S /bin/bash \
+ -d "$(pwd)" -l "nodes=1:ppn=8,mem=12G" \
+ ~donarm/uiuc_igb_scripts/run_make
+ );
+done;
+\end{minted} %$
+\end{frame}
+