]> git.donarmstrong.com Git - debbugs-presentations.git/commitdiff
use scales and data.table
authorDon Armstrong <don@donarmstrong.com>
Tue, 8 Aug 2017 15:40:22 +0000 (08:40 -0700)
committerDon Armstrong <don@donarmstrong.com>
Tue, 8 Aug 2017 15:40:22 +0000 (08:40 -0700)
debbugs.Rnw

index 1f233b095a124dbdaaa09cc5322ec533b3dd1c3e..7d059c7fcfc515f81c8d2670cb967cf297182c07 100644 (file)
 library(lattice)
 library(xtable)
 library(ggplot2)
+library("scales")
 library(reshape2)
-opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
+library("data.table")
+opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio",fig.width=6,fig.height=4)
 opts_chunk$set(cache=TRUE, autodep=TRUE)
-options(device = function(file, width = 8, height = 7, ...) {
+options(device = function(file, width = 6, height = 4, ...) {
   cairo_pdf(tempfile(), width = width, height = height, ...)
 })
 
 @ 
 
+<<scientific_format,echo=FALSE>>=
+scientific_10 <- function(x) {
+  parse(text=gsub("e", " %*% 10^", scientific_format()(x)))
+}
+@ 
 
 \begin{document}
 
@@ -165,16 +172,18 @@ options(device = function(file, width = 8, height = 7, ...) {
 \subsection{Bug Reporting Rate}
 \begin{frame}{How many bugs do we have?}
   \begin{center}
-<<bug.growth,fig=TRUE,echo=FALSE>>=
-bug.growth <- read.table("data/sorted_bug_growth_for_r_every_500.txt",stringsAsFactors=FALSE);
+<<bug.growth,fig=TRUE,echo=FALSE,cache.extra=file.info("data/sorted_bug_growth_for_r_every_500.txt")[,"mtime"]>>=
+bug.growth <- fread("data/sorted_bug_growth_for_r_every_500.txt")
 colnames(bug.growth) <- c("time","bugs")
-bug.growth <- bug.growth[pmax(bug.growth$bugs) <= as.numeric(bug.growth$bugs),]
-bug.growth$date <- 
-  as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(bug.growth$time))
-print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],aes(x=date,y=bugs))+
+bug.growth <- bug.growth[pmax(bugs) <= as.numeric(bugs),]
+bug.growth[,date:=
+                as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+
+                           as.numeric(bug.growth$time))]
+print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],
+             aes(x=date,y=bugs))+
       xlab("Time")+ylab("Bugs filed in Debian")+
-      geom_point()+stat_smooth(method="lm")+
-      ggtitle("Bug growth versus time"))
+      scale_y_continuous(labels = scientific_10)+
+      geom_point()+stat_smooth(method="lm"))
 @ 
 \end{center}
 \end{frame}
@@ -183,24 +192,28 @@ print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)
   \begin{columns}
     \column{0.5\textwidth}
   \begin{center}
-<<bugs.filed,fig=TRUE,echo=FALSE,warning=FALSE>>=
-print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),],aes(x=date,y=bugs))+
+<<bugs.filed,fig=TRUE,echo=FALSE,warning=FALSE,fig.width=3,fig.height=3>>=
+print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),],
+             aes(x=date,y=bugs))+
       xlab("Time")+ylab("Bugs filed in Debian")+
-      geom_point()+stat_smooth(method="lm")+
-      ggtitle("Christian Perrier's Plot"))
+      scale_y_continuous(labels = scientific_10)+
+      geom_point()+stat_smooth(method="lm"))
 bugs.filed.per.day <-
   lm(bugs~date,bug.growth)$coeff[2]*3600*24
-temp.lm <- lm(date~bugs,bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),])
-bug.760000 <- 
-    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=760000,date=NA))))
-bug.800000 <- 
-    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=800000,date=NA))))
+temp.lm <- lm(date~bugs,bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),])
+bug.880000 <- 
+    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=880000,date=NA))))
+bug.900000 <- 
+    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=900000,date=NA))))
+bug.1000000 <- 
+    as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=1000000,date=NA))))
 @ 
 \end{center}
 \column{0.5\textwidth}
 Roughly \Sexpr{format(bugs.filed.per.day,digits=1)} bugs are filed per
-day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the
-800000th bug will be filed at \Sexpr{bug.800000}.
+day. The 880000th bug will be filed at \Sexpr{bug.880000}, the
+900000th bug will be filed at \Sexpr{bug.900000}, and bug one million will be filled 
+\Sexpr{bug.1000000}.
 \end{columns}
 \end{frame}
 
@@ -208,57 +221,58 @@ day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the
 \subsection{Bug Fixing Rate}
 \begin{frame}
   \begin{center}
-<<bugs.closed,fig=TRUE,echo=FALSE,width=7,height=5,warning=FALSE,error=FALSE>>=
-bug.closed.series <- read.table(file="data/bug_closed_time_series.txt")
+<<bugs.closed,fig=TRUE,echo=FALSE,width=7,height=5,warning=FALSE,error=FALSE,cache.extra=file.info("data/bug_closed_time_series.txt")[,"mtime"]>>=
+bug.closed.series <- fread(file="data/bug_closed_time_series.txt")
 colnames(bug.closed.series) <- c("archived.bugs","year.week")
-bug.closed.series$week <-
-    gsub("(\\d+)-(\\d+)","\\2",bug.closed.series$year.week)
-bug.closed.series$year <-
-    gsub("(\\d+)-(\\d+)","\\1",bug.closed.series$year.week)
-bug.closed.series$doy <- 
-    as.numeric(bug.closed.series$week)*7
-bug.closed.series$year.doy <- 
-    paste(sep="-",bug.closed.series$year,bug.closed.series$doy)
-bug.closed.series$date <-
-  as.POSIXct(strptime(bug.closed.series$year.doy,
-                      format="%Y-%j"))
+bug.closed.series[,week:=
+    gsub("(\\d+)-(\\d+)","\\2",year.week)]
+bug.closed.series[,year:=
+    gsub("(\\d+)-(\\d+)","\\1",year.week)]
+bug.closed.series[,doy:= as.numeric(week)*7]
+bug.closed.series[,year.doy:=
+                       paste(sep="-",year,doy)]
+bug.closed.series[,date:=
+                       as.POSIXct(strptime(year.doy,
+                                           format="%Y-%j"))]
 
 # bug.closed.ts <- ts(bug.closed.series[,1],start=1,frequency=7)
 
-print(ggplot(bug.closed.series[bug.closed.series$date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),],
+print(ggplot(bug.closed.series[date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),],
              aes(x=date,y=archived.bugs/7))+geom_line()+stat_smooth(method="lm")+
       ylab("Bugs archived Per Day")+xlab("Time"))
-bugs.closed.per.day <- 
-    sum(bug.closed.series$archived.bugs)/
-    as.numeric(bug.closed.series$date[nrow(bug.closed.series)]-bug.closed.series$date[1])
+bugs.closed.per.day <-
+    bug.closed.series[,sum(archived.bugs)]/
+    as.numeric(bug.closed.series[nrow(bug.closed.series),date]-
+               bug.closed.series[1,date])
 @ 
   \end{center}
 Roughly \Sexpr{format(bugs.closed.per.day,digits=1)} bugs are closed per day.
 \end{frame}
 
 \subsection{RC Bugs}
-\begin{frame}
+\begin{frame}{RC Bugs in the Past Year}
   \begin{center}
-<<rc.bugs,fig=TRUE,echo=FALSE>>=
+<<rc.bugs,fig=TRUE,echo=FALSE,warning=FALSE,cache.extra=file.info("data/rc_bugs.txt")[,"mtime"]>>=
 rc.bugs <-
-  read.table(file="data/rc_bugs.txt",
-             header=TRUE,fill=TRUE)
-rc.bugs <- data.frame(rc.bugs)
-rc.bugs <- rc.bugs[,c(-5,-7)]
-rc.bugs$date <-
-  as.POSIXct(strptime(rc.bugs$date,
-                      format="%Y%m%d%H%M"))
+    data.table(read.table(file="data/rc_bugs.txt",
+                          header=TRUE,fill=TRUE))
+rc.bugs[,date:=
+             as.POSIXct(strptime(date,
+                                 format="%Y%m%d%H%M"))]
+rc.bugs[,unknown:=NULL]
+rc.bugs[,unknown.1:=NULL]
+for (i in 1:10) {
+    rc.bugs <- rc.bugs[c(0,diff(total)) > -1000,]
+}
 rc.bugs.long <-
-    melt(rc.bugs,id="date")
-print(ggplot(rc.bugs.long[rc.bugs.long$date > 
-                          as.POSIXct(ISOdatetime(2013,08,1,0,0,0)),]
-             ,aes(x=date,y=value,color=variable))+
+    data.table(melt(rc.bugs,id="date"))
+print(ggplot(rc.bugs.long[date > 
+                          as.POSIXct(ISOdatetime(2016,08,1,0,0,0)),]
+            ,aes(x=date,y=value,color=variable))+
       geom_line()+
-      ggtitle("RC Bugs in the past year")+
-      ylab("RC Bugs")+xlab("Time")+
-      theme(legend.position=c(0.9,0.5))+
-      guides(color=guide_legend(title="Measure")))
-
+      ylab("# of Release Critical Bugs")+xlab("Time")+
+      theme(legend.position="top")+
+      scale_color_discrete("Measure"))
 @ 
 \end{center}
   \setbeamercolor{postit}{fg=black,bg=yellow}