From 830b85cabb4af497cc9439d4f0e69c2609ad86ab Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Tue, 8 Aug 2017 08:40:22 -0700 Subject: [PATCH] use scales and data.table --- debbugs.Rnw | 126 +++++++++++++++++++++++++++++----------------------- 1 file changed, 70 insertions(+), 56 deletions(-) diff --git a/debbugs.Rnw b/debbugs.Rnw index 1f233b0..7d059c7 100644 --- a/debbugs.Rnw +++ b/debbugs.Rnw @@ -108,15 +108,22 @@ library(lattice) library(xtable) library(ggplot2) +library("scales") library(reshape2) -opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio") +library("data.table") +opts_chunk$set(dev="cairo_pdf",out.width="\\textwidth",out.height="0.8\\textheight",out.extra="keepaspectratio",fig.width=6,fig.height=4) opts_chunk$set(cache=TRUE, autodep=TRUE) -options(device = function(file, width = 8, height = 7, ...) { +options(device = function(file, width = 6, height = 4, ...) { cairo_pdf(tempfile(), width = width, height = height, ...) }) @ +<>= +scientific_10 <- function(x) { + parse(text=gsub("e", " %*% 10^", scientific_format()(x))) +} +@ \begin{document} @@ -165,16 +172,18 @@ options(device = function(file, width = 8, height = 7, ...) { \subsection{Bug Reporting Rate} \begin{frame}{How many bugs do we have?} \begin{center} -<>= -bug.growth <- read.table("data/sorted_bug_growth_for_r_every_500.txt",stringsAsFactors=FALSE); +<>= +bug.growth <- fread("data/sorted_bug_growth_for_r_every_500.txt") colnames(bug.growth) <- c("time","bugs") -bug.growth <- bug.growth[pmax(bug.growth$bugs) <= as.numeric(bug.growth$bugs),] -bug.growth$date <- - as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(bug.growth$time)) -print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),],aes(x=date,y=bugs))+ +bug.growth <- bug.growth[pmax(bugs) <= as.numeric(bugs),] +bug.growth[,date:= + as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+ + as.numeric(bug.growth$time))] +print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0)),], + aes(x=date,y=bugs))+ xlab("Time")+ylab("Bugs filed in Debian")+ - geom_point()+stat_smooth(method="lm")+ - ggtitle("Bug growth versus time")) + scale_y_continuous(labels = scientific_10)+ + geom_point()+stat_smooth(method="lm")) @ \end{center} \end{frame} @@ -183,24 +192,28 @@ print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2005,1,1,0,0,0) \begin{columns} \column{0.5\textwidth} \begin{center} -<>= -print(ggplot(bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),],aes(x=date,y=bugs))+ +<>= +print(ggplot(bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),], + aes(x=date,y=bugs))+ xlab("Time")+ylab("Bugs filed in Debian")+ - geom_point()+stat_smooth(method="lm")+ - ggtitle("Christian Perrier's Plot")) + scale_y_continuous(labels = scientific_10)+ + geom_point()+stat_smooth(method="lm")) bugs.filed.per.day <- lm(bugs~date,bug.growth)$coeff[2]*3600*24 -temp.lm <- lm(date~bugs,bug.growth[bug.growth$date > as.POSIXct(ISOdatetime(2013,1,1,0,0,0)),]) -bug.760000 <- - as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=760000,date=NA)))) -bug.800000 <- - as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=800000,date=NA)))) +temp.lm <- lm(date~bugs,bug.growth[date > as.POSIXct(ISOdatetime(2015,1,1,0,0,0)),]) +bug.880000 <- + as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=880000,date=NA)))) +bug.900000 <- + as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=900000,date=NA)))) +bug.1000000 <- + as.POSIXct(ISOdatetime(1970,1,1,0,0,0)+as.numeric(predict(temp.lm,data.frame(bugs=1000000,date=NA)))) @ \end{center} \column{0.5\textwidth} Roughly \Sexpr{format(bugs.filed.per.day,digits=1)} bugs are filed per -day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the -800000th bug will be filed at \Sexpr{bug.800000}. +day. The 880000th bug will be filed at \Sexpr{bug.880000}, the +900000th bug will be filed at \Sexpr{bug.900000}, and bug one million will be filled +\Sexpr{bug.1000000}. \end{columns} \end{frame} @@ -208,57 +221,58 @@ day. The 760000th bug will be filed at \Sexpr{bug.760000}, and the \subsection{Bug Fixing Rate} \begin{frame} \begin{center} -<>= -bug.closed.series <- read.table(file="data/bug_closed_time_series.txt") +<>= +bug.closed.series <- fread(file="data/bug_closed_time_series.txt") colnames(bug.closed.series) <- c("archived.bugs","year.week") -bug.closed.series$week <- - gsub("(\\d+)-(\\d+)","\\2",bug.closed.series$year.week) -bug.closed.series$year <- - gsub("(\\d+)-(\\d+)","\\1",bug.closed.series$year.week) -bug.closed.series$doy <- - as.numeric(bug.closed.series$week)*7 -bug.closed.series$year.doy <- - paste(sep="-",bug.closed.series$year,bug.closed.series$doy) -bug.closed.series$date <- - as.POSIXct(strptime(bug.closed.series$year.doy, - format="%Y-%j")) +bug.closed.series[,week:= + gsub("(\\d+)-(\\d+)","\\2",year.week)] +bug.closed.series[,year:= + gsub("(\\d+)-(\\d+)","\\1",year.week)] +bug.closed.series[,doy:= as.numeric(week)*7] +bug.closed.series[,year.doy:= + paste(sep="-",year,doy)] +bug.closed.series[,date:= + as.POSIXct(strptime(year.doy, + format="%Y-%j"))] # bug.closed.ts <- ts(bug.closed.series[,1],start=1,frequency=7) -print(ggplot(bug.closed.series[bug.closed.series$date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),], +print(ggplot(bug.closed.series[date > as.POSIXct(ISOdatetime(2008,1,1,0,0,0)),], aes(x=date,y=archived.bugs/7))+geom_line()+stat_smooth(method="lm")+ ylab("Bugs archived Per Day")+xlab("Time")) -bugs.closed.per.day <- - sum(bug.closed.series$archived.bugs)/ - as.numeric(bug.closed.series$date[nrow(bug.closed.series)]-bug.closed.series$date[1]) +bugs.closed.per.day <- + bug.closed.series[,sum(archived.bugs)]/ + as.numeric(bug.closed.series[nrow(bug.closed.series),date]- + bug.closed.series[1,date]) @ \end{center} Roughly \Sexpr{format(bugs.closed.per.day,digits=1)} bugs are closed per day. \end{frame} \subsection{RC Bugs} -\begin{frame} +\begin{frame}{RC Bugs in the Past Year} \begin{center} -<>= +<>= rc.bugs <- - read.table(file="data/rc_bugs.txt", - header=TRUE,fill=TRUE) -rc.bugs <- data.frame(rc.bugs) -rc.bugs <- rc.bugs[,c(-5,-7)] -rc.bugs$date <- - as.POSIXct(strptime(rc.bugs$date, - format="%Y%m%d%H%M")) + data.table(read.table(file="data/rc_bugs.txt", + header=TRUE,fill=TRUE)) +rc.bugs[,date:= + as.POSIXct(strptime(date, + format="%Y%m%d%H%M"))] +rc.bugs[,unknown:=NULL] +rc.bugs[,unknown.1:=NULL] +for (i in 1:10) { + rc.bugs <- rc.bugs[c(0,diff(total)) > -1000,] +} rc.bugs.long <- - melt(rc.bugs,id="date") -print(ggplot(rc.bugs.long[rc.bugs.long$date > - as.POSIXct(ISOdatetime(2013,08,1,0,0,0)),] - ,aes(x=date,y=value,color=variable))+ + data.table(melt(rc.bugs,id="date")) +print(ggplot(rc.bugs.long[date > + as.POSIXct(ISOdatetime(2016,08,1,0,0,0)),] + ,aes(x=date,y=value,color=variable))+ geom_line()+ - ggtitle("RC Bugs in the past year")+ - ylab("RC Bugs")+xlab("Time")+ - theme(legend.position=c(0.9,0.5))+ - guides(color=guide_legend(title="Measure"))) - + ylab("# of Release Critical Bugs")+xlab("Time")+ + theme(legend.position="top")+ + scale_color_discrete("Measure")) @ \end{center} \setbeamercolor{postit}{fg=black,bg=yellow} -- 2.39.2