+\begin{frame}{Calling records from SRA: The problem}
+ \begin{itemize}
+ \item ≈200 tissue samples from Roadmap Epigenomics
+ \item No consistent workflow
+ \item Reanalyze them all using STAR and cufflinks
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Downloading}
+\begin{minted}[showtabs,breaklines]{make}
+SRX=SRX007165
+SRRS=SRR020291 SRR020290
+NREADS=1
+SRR_FILES=$(patsubst %,%.sra,$(SRRS))
+
+get_srr: $(SRR_FILES)
+
+$(SRR_FILES): %.sra:
+ rsync -avP "rsync://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/$(shell echo -n $*|sed 's/\(SRR[0-9][0-9][0-9]\).*/\1/')/$*/$*.sra" $@;
+\end{minted}
+%$
+ \begin{itemize}
+ \item First three lines are actually generated by other code and
+ included
+ \item Download all of the SRA files using rsync
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Dumping fastq}
+\begin{minted}[showtabs,breaklines]{make}
+ifeq ($(NREADS),1)
+FASTQ_FILES:=$(patsubst %,%.fastq.gz,$(SRRS))
+else
+FASTQ_FILES:=$(patsubst %,%_1.fastq.gz,$(SRRS)) $(patsubst %,%_2.fastq.gz,$(SRRS))
+endif
+
+make_fastq: $(FASTQ_FILES)
+\end{minted}
+%$
+\begin{itemize}
+\item Use ifeq/else/endif to handle paired reads differently from
+ unpaired reads
+\item FASTQ\_FILES is the full set of fastq files dumped from the SRAs.
+\end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Dumping fastq #2}
+\begin{minted}[showtabs,breaklines]{make}
+ifeq ($(NREADS),1)
+$(FASTQ_FILES): %.fastq.gz: %.sra
+else
+%_1.fastq.gz %_2.fastq.gz: %.sra
+endif
+ $(MODULE) load sratoolkit/2.3.5-2; \
+ fastq-dump --split-3 --gzip $^;
+\end{minted}
+%$
+ \begin{itemize}
+ \item Handles NREADS of 1 and 2 differently
+ \item Call fastq-dump to dump the fastq files
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Align with STAR}
+\begin{minted}[showtabs,breaklines]{make}
+$(SRX)_star.bam:
+ $(MODULE) load STAR/2.4.2a; \
+ mkdir -p $(SRX)_star; \
+ STAR --outFileNamePrefix $(SRX)_star/ \
+ --outSAMtype BAM SortedByCoordinate \
+ --runThreadN $(CORES) \
+ --outSAMstrandField intronMotif \
+ --genomeDir $(STAR_INDEX_DIR) \
+ --readFilesCommand "gzip -dc" \
+ --readFilesIn $(TOPHAT_FASTQ_ARGUMENT);
+ ln $(SRX)_star/Aligned.*.bam $@ -s
+\end{minted}
+%$
+ \begin{itemize}
+ \item Call STAR with lots of options to do the alignment
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Calling records from SRA: Call with cufflinks}
+\begin{minted}[showtabs,breaklines]{make}
+call: $(SRX)_genes.fpkm_tracking
+
+$(SRX)_genes.fpkm_tracking: $(SRX)_star.bam $(BOWTIE_INDEX_DIR)$(GTF)
+ $(MODULE) load cufflinks/2.2.1; \
+ cufflinks -p $(CORES) -G $(wordlist 2,2,$^) $<
+ for file in genes.fpkm_tracking isoforms.fpkm_tracking skipped.gtf transcripts.gtf; do \
+ mv $${file} $(SRX)_$${file}; \
+ done;
+\end{minted}
+%$
+ \begin{itemize}
+ \item Use cufflinks to call
+ \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Run it on biocluster}
+ \begin{minted}{shell}
+~donarm/uiuc_igb_scripts/dqsub --mem 70G \
+ --ppn 8 make call;
+\end{minted}
+\begin{itemize}
+\item dqsub is my own qsub wrapper which avoids me having to write
+ little scripts for everything
+\item \url{http://git.donarmstrong.com/?p=uiuc_igb_scripts.git;a=blob;f=dqsub}
+\end{itemize}
+\end{frame}
+