library(lattice)
library(xtable)
library(ggplot2)
+library("scales")
library(reshape2)
-opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio")
+library("data.table")
+opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio",fig.width=6,fig.height=4)
opts_chunk$set(cache=TRUE, autodep=TRUE)
-options(device = function(file, width = 8, height = 7, ...) {
+options(device = function(file, width = 6, height = 4, ...) {
cairo_pdf(tempfile(), width = width, height = height, ...)
})
@
+<<scientific_format,echo=FALSE>>=
+scientific_10 <- function(x) {
+ parse(text=gsub("e", " %*% 10^", scientific_format()(x)))
+}
+@
\begin{document}
\subsection{Bug Reporting Rate}
\begin{frame}{How many bugs do we have?}
\begin{center}
-<<bug.growth,fig=TRUE,echo=FALSE>>=
-bug.growth <- read.table("data/sorted_bug_growth_for_r_every_500.txt",stringsAsFactors=FALSE);
+<<bug.growth,fig=TRUE,echo=FALSE,cache.extra=file.info("data/sorted_bug_growth_for_r_every_500.txt")[,"mtime"]>>=
+bug.growth <- fread("data/sorted_bug_growth_for_r_every_500.txt")
colnames(bug.growth) <- c("time","bugs")
-bug.growth <- bug.growth[pmax(bug.growth$bugs) <= as.numeric(bug.growth$bugs),]
-bug.growth$date <-
- as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(bug.growth$time))
-print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],aes(x=date,y=bugs))+
+bug.growth <- bug.growth[pmax(bugs) <= as.numeric(bugs),]
+bug.growth[,date:=
+ as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+
+ as.numeric(bug.growth$time))]
+print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],
+ aes(x=date,y=bugs))+
xlab("Time")+ylab("Bugs filed in Debian")+
- geom_point()+stat_smooth(method="lm")+
- ggtitle("Bug growth versus time"))
+ scale_y_continuous(labels = scientific_10)+
+ geom_point()+stat_smooth(method="lm"))
@
\end{center}
\end{frame}
\begin{columns}
\column{0.5\textwidth}
\begin{center}
-<<bugs.filed,fig=TRUE,echo=FALSE,warning=FALSE>>=
-print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),],aes(x=date,y=bugs))+
+<<bugs.filed,fig=TRUE,echo=FALSE,warning=FALSE,fig.width=3,fig.height=3>>=
+print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),],
+ aes(x=date,y=bugs))+
xlab("Time")+ylab("Bugs filed in Debian")+
- geom_point()+stat_smooth(method="lm")+
- ggtitle("Christian Perrier's Plot"))
+ scale_y_continuous(labels = scientific_10)+
+ geom_point()+stat_smooth(method="lm"))
bugs.filed.per.day <-
lm(bugs~date,bug.growth)$coeff[2]*3600*24
-temp.lm <- lm(date~bugs,bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),])
-bug.760000 <-
- as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=760000,date=NA))))
-bug.800000 <-
- as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=800000,date=NA))))
+temp.lm <- lm(date~bugs,bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),])
+bug.880000 <-
+ as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=880000,date=NA))))
+bug.900000 <-
+ as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=900000,date=NA))))
+bug.1000000 <-
+ as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=1000000,date=NA))))
@
\end{center}
\column{0.5\textwidth}
Roughly \Sexpr{format(bugs.filed.per.day,digits=1)} bugs are filed per
-day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the
-800000th bug will be filed at \Sexpr{bug.800000}.
+day. The 880000th bug will be filed at \Sexpr{bug.880000}, the
+900000th bug will be filed at \Sexpr{bug.900000}, and bug one million will be filled
+\Sexpr{bug.1000000}.
\end{columns}
\end{frame}
\subsection{Bug Fixing Rate}
\begin{frame}
\begin{center}
-<<bugs.closed,fig=TRUE,echo=FALSE,width=7,height=5,warning=FALSE,error=FALSE>>=
-bug.closed.series <- read.table(file="data/bug_closed_time_series.txt")
+<<bugs.closed,fig=TRUE,echo=FALSE,width=7,height=5,warning=FALSE,error=FALSE,cache.extra=file.info("data/bug_closed_time_series.txt")[,"mtime"]>>=
+bug.closed.series <- fread(file="data/bug_closed_time_series.txt")
colnames(bug.closed.series) <- c("archived.bugs","year.week")
-bug.closed.series$week <-
- gsub("(\\d+)-(\\d+)","\\2",bug.closed.series$year.week)
-bug.closed.series$year <-
- gsub("(\\d+)-(\\d+)","\\1",bug.closed.series$year.week)
-bug.closed.series$doy <-
- as.numeric(bug.closed.series$week)*7
-bug.closed.series$year.doy <-
- paste(sep="-",bug.closed.series$year,bug.closed.series$doy)
-bug.closed.series$date <-
- as.POSIXct(strptime(bug.closed.series$year.doy,
- format="%Y-%j"))
+bug.closed.series[,week:=
+ gsub("(\\d+)-(\\d+)","\\2",year.week)]
+bug.closed.series[,year:=
+ gsub("(\\d+)-(\\d+)","\\1",year.week)]
+bug.closed.series[,doy:= as.numeric(week)*7]
+bug.closed.series[,year.doy:=
+ paste(sep="-",year,doy)]
+bug.closed.series[,date:=
+ as.POSIXct(strptime(year.doy,
+ format="%Y-%j"))]
# bug.closed.ts <- ts(bug.closed.series[,1],start=1,frequency=7)
-print(ggplot(bug.closed.series[bug.closed.series$date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),],
+print(ggplot(bug.closed.series[date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),],
aes(x=date,y=archived.bugs/7))+geom_line()+stat_smooth(method="lm")+
ylab("Bugs archived Per Day")+xlab("Time"))
-bugs.closed.per.day <-
- sum(bug.closed.series$archived.bugs)/
- as.numeric(bug.closed.series$date[nrow(bug.closed.series)]-bug.closed.series$date[1])
+bugs.closed.per.day <-
+ bug.closed.series[,sum(archived.bugs)]/
+ as.numeric(bug.closed.series[nrow(bug.closed.series),date]-
+ bug.closed.series[1,date])
@
\end{center}
Roughly \Sexpr{format(bugs.closed.per.day,digits=1)} bugs are closed per day.
\end{frame}
\subsection{RC Bugs}
-\begin{frame}
+\begin{frame}{RC Bugs in the Past Year}
\begin{center}
-<<rc.bugs,fig=TRUE,echo=FALSE>>=
+<<rc.bugs,fig=TRUE,echo=FALSE,warning=FALSE,cache.extra=file.info("data/rc_bugs.txt")[,"mtime"]>>=
rc.bugs <-
- read.table(file="data/rc_bugs.txt",
- header=TRUE,fill=TRUE)
-rc.bugs <- data.frame(rc.bugs)
-rc.bugs <- rc.bugs[,c(-5,-7)]
-rc.bugs$date <-
- as.POSIXct(strptime(rc.bugs$date,
- format="%Y%m%d%H%M"))
+ data.table(read.table(file="data/rc_bugs.txt",
+ header=TRUE,fill=TRUE))
+rc.bugs[,date:=
+ as.POSIXct(strptime(date,
+ format="%Y%m%d%H%M"))]
+rc.bugs[,unknown:=NULL]
+rc.bugs[,unknown.1:=NULL]
+for (i in 1:10) {
+ rc.bugs <- rc.bugs[c(0,diff(total)) > -1000,]
+}
rc.bugs.long <-
- melt(rc.bugs,id="date")
-print(ggplot(rc.bugs.long[rc.bugs.long$date >
- as.POSIXct(ISOdatetime(2013,08,1,0,0,0)),]
- ,aes(x=date,y=value,color=variable))+
+ data.table(melt(rc.bugs,id="date"))
+print(ggplot(rc.bugs.long[date >
+ as.POSIXct(ISOdatetime(2016,08,1,0,0,0)),]
+ ,aes(x=date,y=value,color=variable))+
geom_line()+
- ggtitle("RC Bugs in the past year")+
- ylab("RC Bugs")+xlab("Time")+
- theme(legend.position=c(0.9,0.5))+
- guides(color=guide_legend(title="Measure")))
-
+ ylab("# of Release Critical Bugs")+xlab("Time")+
+ theme(legend.position="top")+
+ scale_color_discrete("Measure"))
@
\end{center}
\setbeamercolor{postit}{fg=black,bg=yellow}