]> git.donarmstrong.com Git - debbugs-presentations.git/blobdiff - debbugs.Rnw
remove too many rc bugs joke
[debbugs-presentations.git] / debbugs.Rnw
index 1f233b095a124dbdaaa09cc5322ec533b3dd1c3e..6fd3082c8ecc7f7af7eb8bca75ab85806c3c2062 100644 (file)
@@ -1,3 +1,36 @@
+
+\usepackage{fontspec}
+\setmainfont{FreeSerif}
+\setsansfont{FreeSans}
+\setmonofont{FreeMono}
+\usepackage{url}
+\usepackage{fancyhdr}
+\usepackage{graphicx}
+\usepackage[bf]{caption}
+\usepackage{rotating}
+\usepackage{wrapfig}
+\usepackage{fancybox}
+\usepackage{booktabs}
+\usepackage{minted}
+\usepackage{tcolorbox}
+\usepackage{etoolbox}
+\BeforeBeginEnvironment{minted}{\begin{tcolorbox}}%
+\AfterEndEnvironment{minted}{\end{tcolorbox}}%
+% \usepackage{multirow}
+\usepackage{acronym}
+\usepackage{qrcode}
+\usepackage[backend=biber,natbib=true,hyperref=true,style=nature]{biblatex}
+\addbibresource{references.bib}
+% \usepackage[nomargin,inline,draft]{fixme}
+% \newcommand{\DLA}[1]{\textcolor{red}{\fxnote{DLA: #1}}}
+% \usepackage[hyperfigures,bookmarks,colorlinks,citecolor=black,filecolor=black,linkcolor=black,urlcolor=black]{hyperref}
+\usepackage{texshade}
+\usepackage{tikz}
+\usepackage{nameref}
+\usepackage{zref-xr,zref-user}
+\renewcommand*{\bibfont}{\tiny}
+\usepackage[absolute,overlay]{textpos}
+
 \mode<presentation>
 {
   \usetheme{Montpellier}
 %  \setbeamercovered{transparent}  
 }
 
-
-\usepackage[no-math]{fontspec}
-\setmainfont[ExternalLocation, 
-             Mapping=tex-text,
-             BoldFont=FreeSerifBold,
-             ItalicFont=FreeSerifItalic,
-             BoldItalicFont=FreeSerifBoldItalic]{FreeSerif}
-\setsansfont[ExternalLocation, 
-             Mapping=tex-text,
-             BoldFont=FreeSerifBold,
-             ItalicFont=FreeSerifItalic,
-             BoldItalicFont=FreeSerifBoldItalic,
-             Scale=MatchLowercase]{FreeSerif}
-\setmonofont{FreeMono}
-
-\usepackage{booktabs}
-\usepackage{multirow}
-\usepackage{setspace}
-\usepackage[backend=biber,natbib=true,hyperref=true,style=numeric-comp]{biblatex}
-\bibliography{references}
-% \usepackage[hyperfigures,bookmarks,colorlinks]{hyperref}
-
+\setbeamertemplate{navigation symbols}{}%remove navigation symbols
 
 \usepackage[nomargin,inline,draft]{fixme}
-%\usepackage[x11names,svgnames]{xcolor}
-\usepackage{texshade}
-\usepackage[absolute,overlay]{textpos}
-\usepackage{tikz}
-\usepackage{nameref}
-\usepackage{ulem}
-\usepackage{zref-xr,zref-user}
 \usepackage{listings}
 
-\newenvironment{narrow}[2]{%
-  \begin{list}{}{%
-      \setlength{\topsep}{0pt}%
-      \setlength{\leftmargin}{#1}%
-      \setlength{\rightmargin}{#2}%
-      \setlength{\listparindent}{\parindent}%
-      \setlength{\itemindent}{\parindent}%
-      \setlength{\parsep}{\parskip}}%
-  \item[]}{\end{list}}
-\def\newblock{\hskip}
-\newenvironment{paperquote}{%
-  \begin{quote}%
-     \it
-  }%
-  {\end{quote}}
-\renewcommand{\textfraction}{0.15}
-\renewcommand{\topfraction}{0.85}
-\renewcommand{\bottomfraction}{0.65}
-\renewcommand{\floatpagefraction}{0.60}
-%\renewcommand{\baselinestretch}{1.8}
-\newenvironment{enumerate*}%
-  {\begin{enumerate}%
-    \setlength{\itemsep}{0pt}%
-    \setlength{\parskip}{0pt}}%
-  {\end{enumerate}}
-\newenvironment{itemize*}%
-  {\begin{itemize}%
-    \setlength{\itemsep}{0pt}%
-    \setlength{\parskip}{0pt}}%
-  {\end{itemize}}
 
 \logo{\begin{tikzpicture}% Pale figure
     {\node[opacity=0.3] {\includegraphics[width=2cm,keepaspectratio]{figures/openlogo-crop.pdf}};}%
     \end{tikzpicture}}
 \author{Don Armstrong}
 \title{Debbugs}
-\subtitle{Database Ho!}
-%\date{August 11th, 2008}
-\titlegraphic{\includegraphics[height=0.3\textheight,keepaspectratio]{figures/openlogo-crop.pdf}}
+\subtitle{22 Years of Bugs}
+\date{August 10th, 2017}
+\titlegraphic{\includegraphics[height=0.2\textheight,keepaspectratio]{figures/openlogo-crop.pdf}}
 \subject{BTS}
 
 % State of the BTS: new features, changes and tips
 %    - release to experimental
 
 
-<<load.libraries,echo=FALSE,results="hide",error=FALSE,message=FALSE>>=
+<<load.libraries,echo=FALSE,results="hide",error=FALSE,message=FALSE,cache=FALSE>>=
 library(lattice)
 library(xtable)
 library(ggplot2)
+library("scales")
 library(reshape2)
-opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
+library("data.table")
+opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio",fig.width=6/1.2,fig.height=4/1.2)
 opts_chunk$set(cache=TRUE, autodep=TRUE)
-options(device = function(file, width = 8, height = 7, ...) {
+options(device = function(file, width = 6/1.2, height = 4/1.2, ...) {
   cairo_pdf(tempfile(), width = width, height = height, ...)
 })
 
 @ 
 
+<<scientific_format,echo=FALSE>>=
+scientific_10 <- function(x) {
+  parse(text=gsub("e", " %*% 10^", scientific_format()(x)))
+}
+@ 
 
 \begin{document}
 
 %\setbeamercolor{frametitle}[bg=-red!90!green!10,fg=black]
 
 \frame[plain]{\titlepage
-  Source available at \url{http://git.donarmstrong.com/debbugs-presentations.git}
+  Code and slides are here: 
+  \qrcode[padding]{https://dla2.us/p/debbugs2017}
+  \url{https://dla2.us/p/debbugs2017}
+
 }
 
 % \begin{frame}{Debbugs}
@@ -137,8 +122,7 @@ options(device = function(file, width = 8, height = 7, ...) {
   \begin{columns}
     \column{0.5\textwidth}
     \begin{itemize}
-    \item Bug Statitics
-    \item Introduction to Debbugs
+    \item Bug Statistics
     \item New features
     \item Planned features
     \item Places you can help
@@ -163,44 +147,81 @@ options(device = function(file, width = 8, height = 7, ...) {
 
 \section{Bugs in Debian}
 \subsection{Bug Reporting Rate}
-\begin{frame}{How many bugs do we have?}
+\begin{frame}{Bugs from the beginning of time}
   \begin{center}
-<<bug.growth,fig=TRUE,echo=FALSE>>=
-bug.growth <- read.table("data/sorted_bug_growth_for_r_every_500.txt",stringsAsFactors=FALSE);
+<<bug.growth,fig=TRUE,echo=FALSE,cache.extra=file.info("data/sorted_bug_growth_for_r_every_500.txt")[,"mtime"]>>=
+bug.growth <- fread("data/sorted_bug_growth_for_r_every_500.txt")
 colnames(bug.growth) <- c("time","bugs")
-bug.growth <- bug.growth[pmax(bug.growth$bugs) <= as.numeric(bug.growth$bugs),]
-bug.growth$date <- 
-  as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(bug.growth$time))
-print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],aes(x=date,y=bugs))+
+bug.growth <- bug.growth[pmax(bugs) <= as.numeric(bugs),]
+bug.growth <- bug.growth[order(time),]
+while (bug.growth[,any(c(0,diff(bugs)) < 0)]) {
+       bug.growth <-
+           bug.growth[c(0,diff(bugs))>=0,]
+}
+bug.growth[,date:=
+                as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+
+                           as.numeric(bug.growth$time))]
+bug.growth[,`bugs per day`:=c(0,diff(bugs)/
+                                as.double(diff(date),"days"))
+           ]
+print(ggplot(bug.growth,#[date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],
+             aes(x=date,y=bugs))+
       xlab("Time")+ylab("Bugs filed in Debian")+
-      geom_point()+stat_smooth(method="lm")+
-      ggtitle("Bug growth versus time"))
+      scale_y_continuous(labels = scientific_10)+
+      geom_point())
 @ 
 \end{center}
 \end{frame}
 
-\begin{frame}
+\begin{frame}[fragile]{Bug Growth Rate}
+<<bug_growth_rate,echo=FALSE>>=
+print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2014,1,1,0,0,0)),],
+             aes(x=date,y=`bugs per day`))+
+      xlab("Time")+ylab("Bugs filed per day")
+      + scale_y_log10(breaks=c(60,100,200,400,800))
+      + geom_line()+stat_smooth(method="lm")
+      + geom_label(data=data.table(date=as.POSIXct(ISOdatetime(2015,04,25,0,0,0)),"bugs per day"=400),label="Jessie",color="red")
+      + geom_label(data=data.table(date=as.POSIXct(ISOdatetime(2017,06,17,0,0,0)),"bugs per day"=400),label="Stretch",color="purple")
+      )
+@ 
+\end{frame}
+
+\begin{frame}[fragile]{Is the bug filing rate decreasing?}
+\tiny
+<<bug_growth_rate_over_time>>=
+summary(lm(log(`bugs per day`)~date,
+           bug.growth[date > "2014-01-01 PST",]))
+@ 
+\normalsize
+Not significantly decreasing.
+\end{frame}
+
+\begin{frame}{My entries into Christian's game}
   \begin{columns}
     \column{0.5\textwidth}
   \begin{center}
-<<bugs.filed,fig=TRUE,echo=FALSE,warning=FALSE>>=
-print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),],aes(x=date,y=bugs))+
+<<bugs.filed,fig=TRUE,echo=FALSE,warning=FALSE,fig.width=3,fig.height=3>>=
+print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),],
+             aes(x=date,y=bugs))+
       xlab("Time")+ylab("Bugs filed in Debian")+
-      geom_point()+stat_smooth(method="lm")+
-      ggtitle("Christian Perrier's Plot"))
+      scale_y_continuous(labels = scientific_10)+
+      geom_point()+stat_smooth(method="lm"))
 bugs.filed.per.day <-
   lm(bugs~date,bug.growth)$coeff[2]*3600*24
-temp.lm <- lm(date~bugs,bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),])
-bug.760000 <- 
-    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=760000,date=NA))))
-bug.800000 <- 
-    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=800000,date=NA))))
+temp.lm <- lm(date~bugs,bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),])
+bug.880000 <- 
+    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=880000,date=NA))))
+bug.900000 <- 
+    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=900000,date=NA))))
+bug.1000000 <- 
+    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=1000000,date=NA))))
 @ 
 \end{center}
 \column{0.5\textwidth}
 Roughly \Sexpr{format(bugs.filed.per.day,digits=1)} bugs are filed per
-day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the
-800000th bug will be filed at \Sexpr{bug.800000}.
+day. The 880000th bug will be filed at \Sexpr{bug.880000}, the
+900000th bug will be filed at \Sexpr{bug.900000}, and bug one million will be filled 
+\Sexpr{bug.1000000}.
 \end{columns}
 \end{frame}
 
@@ -208,68 +229,60 @@ day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the
 \subsection{Bug Fixing Rate}
 \begin{frame}
   \begin{center}
-<<bugs.closed,fig=TRUE,echo=FALSE,width=7,height=5,warning=FALSE,error=FALSE>>=
-bug.closed.series <- read.table(file="data/bug_closed_time_series.txt")
+<<bugs.closed,fig=TRUE,echo=FALSE,width=7,height=5,warning=FALSE,error=FALSE,cache.extra=file.info("data/bug_closed_time_series.txt")[,"mtime"]>>=
+bug.closed.series <- fread(file="data/bug_closed_time_series.txt")
 colnames(bug.closed.series) <- c("archived.bugs","year.week")
-bug.closed.series$week <-
-    gsub("(\\d+)-(\\d+)","\\2",bug.closed.series$year.week)
-bug.closed.series$year <-
-    gsub("(\\d+)-(\\d+)","\\1",bug.closed.series$year.week)
-bug.closed.series$doy <- 
-    as.numeric(bug.closed.series$week)*7
-bug.closed.series$year.doy <- 
-    paste(sep="-",bug.closed.series$year,bug.closed.series$doy)
-bug.closed.series$date <-
-  as.POSIXct(strptime(bug.closed.series$year.doy,
-                      format="%Y-%j"))
+bug.closed.series[,week:=
+    gsub("(\\d+)-(\\d+)","\\2",year.week)]
+bug.closed.series[,year:=
+    gsub("(\\d+)-(\\d+)","\\1",year.week)]
+bug.closed.series[,doy:= as.numeric(week)*7]
+bug.closed.series[,year.doy:=
+                       paste(sep="-",year,doy)]
+bug.closed.series[,date:=
+                       as.POSIXct(strptime(year.doy,
+                                           format="%Y-%j"))]
 
 # bug.closed.ts <- ts(bug.closed.series[,1],start=1,frequency=7)
 
-print(ggplot(bug.closed.series[bug.closed.series$date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),],
+print(ggplot(bug.closed.series[date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),],
              aes(x=date,y=archived.bugs/7))+geom_line()+stat_smooth(method="lm")+
       ylab("Bugs archived Per Day")+xlab("Time"))
-bugs.closed.per.day <- 
-    sum(bug.closed.series$archived.bugs)/
-    as.numeric(bug.closed.series$date[nrow(bug.closed.series)]-bug.closed.series$date[1])
+bugs.closed.per.day <-
+    bug.closed.series[,sum(archived.bugs)]/
+    as.numeric(bug.closed.series[nrow(bug.closed.series),date]-
+               bug.closed.series[1,date])
 @ 
   \end{center}
 Roughly \Sexpr{format(bugs.closed.per.day,digits=1)} bugs are closed per day.
 \end{frame}
 
 \subsection{RC Bugs}
-\begin{frame}
+\begin{frame}{RC Bugs in the Past Year}
   \begin{center}
-<<rc.bugs,fig=TRUE,echo=FALSE>>=
+<<rc.bugs,fig=TRUE,echo=FALSE,warning=FALSE,cache.extra=file.info("data/rc_bugs.txt")[,"mtime"]>>=
 rc.bugs <-
-  read.table(file="data/rc_bugs.txt",
-             header=TRUE,fill=TRUE)
-rc.bugs <- data.frame(rc.bugs)
-rc.bugs <- rc.bugs[,c(-5,-7)]
-rc.bugs$date <-
-  as.POSIXct(strptime(rc.bugs$date,
-                      format="%Y%m%d%H%M"))
+    data.table(read.table(file="data/rc_bugs.txt",
+                          header=TRUE,fill=TRUE))
+rc.bugs[,date:=
+             as.POSIXct(strptime(date,
+                                 format="%Y%m%d%H%M"))]
+rc.bugs[,unknown:=NULL]
+rc.bugs[,unknown.1:=NULL]
+for (i in 1:10) {
+    rc.bugs <- rc.bugs[c(0,diff(total)) > -1000,]
+}
 rc.bugs.long <-
-    melt(rc.bugs,id="date")
-print(ggplot(rc.bugs.long[rc.bugs.long$date > 
-                          as.POSIXct(ISOdatetime(2013,08,1,0,0,0)),]
-             ,aes(x=date,y=value,color=variable))+
+    data.table(melt(rc.bugs,id="date"))
+print(ggplot(rc.bugs.long[date > 
+                          as.POSIXct(ISOdatetime(2016,08,1,0,0,0)),]
+            ,aes(x=date,y=value,color=variable))+
       geom_line()+
-      ggtitle("RC Bugs in the past year")+
-      ylab("RC Bugs")+xlab("Time")+
-      theme(legend.position=c(0.9,0.5))+
-      guides(color=guide_legend(title="Measure")))
-
+      ylab("# of Release Critical Bugs")+xlab("Time")+
+      theme(legend.position="top")+
+      scale_color_discrete("Measure"))
 @ 
 \end{center}
-  \setbeamercolor{postit}{fg=black,bg=yellow}
-  \begin{textblock}{4}(6,4)
-    \begin{onlyenv}<2>
-      \begin{beamercolorbox}[sep=1em,wd=5cm]{postit}
-        \centering \huge Too many RC bugs!
-      \end{beamercolorbox}
-    \end{onlyenv}
-  \end{textblock}
-
 \end{frame}
 
 \section{Debbugs Structure and Infrastructure}
@@ -330,15 +343,15 @@ print(ggplot(rc.bugs.long[rc.bugs.long$date >
   % SQL loading
   \begin{itemize}
   \item Load bugs
-\begin{lstlisting}[language=sh]
+\begin{minted}{sh}
 debbugs-loadsql bugs;
 debbugs-loadsql bugs archive;
-\end{lstlisting}
+\end{minted}
   \item Load Versioning information
-\begin{lstlisting}[language=sh]
+\begin{minted}{sh}
 debbugs-loadsql versions;
 debbugs-loadsql debinfo;
-\end{lstlisting}
+\end{minted}
   \end{itemize}
 \end{frame}