# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
#
-# Create a figure with the NeuroDebian repo subscription stats from the apache logs
-# Requires out put of
-# zgrep "GET /lists/[-a-z\.]\+ HTTP" neuro.debian.net-*access.log* | sed -e 's,[^:]*:\([0-9\.]\+\).*\[\(.*\):.*:.*:.*/lists/\(.*\) HTTP.*,\2;\3;\1,' -e 's,/, ,g'
-# either from a file or on stdin. Needs output filename as the only argument
-
import fileinput
import sys
+import time
from datetime import datetime
-import numpy as np
-import matplotlib
-matplotlib.use('Agg')
-import pylab as pl
-from matplotlib.dates import date2num, num2date
-from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
-from matplotlib.font_manager import FontProperties
-from ConfigParser import SafeConfigParser
-from math import ceil
-
-dt = [('ip', '|S16'),
- ('loc', '|S3'),
- ('suite', '|S20'),
- ('date', float)]
-
-
-def make_figure(data, ymax=None):
- fig = pl.figure(figsize=(14,3))
- distros = ('Debian', 'Ubuntu')
- # Sorting is actually seems to be not needed on Python 2.7
- # which probably returns release codenames in the order as
- # in the config file which is already correct
- # But since our server is still on previous stable release
- # let's sort for now explicitly
- # 9999 for 'nd' == 'sid'
- sorting_ids = dict([(x[0], len(x[1])>2 and float(x[1][2:]) or 9999)
- for x in cfg.items('release backport ids')])
- for idistro, distro in enumerate(distros):
- ax = fig.add_subplot(1, len(distros), idistro+1)
- suites = [code for code in cfg.options('release codenames')
- if cfg.get('release codenames', code).count(distro)]
- # sort suites according to backport ids
- # and in reverse order so the freshiest is on top
- suites = sorted(suites,
- cmp=lambda x,y: cmp(sorting_ids[x], sorting_ids[y]),
- reverse=True)
- plot_datehist(ax, data, 10, suites, title=distro, ymax=ymax)
- fig.autofmt_xdate()
- return fig
-
+import re
+import json
+import operator
-def plot_datehist(ax, data, bins, suites, title=None, ymax=None):
- colors=['#ff0088', '#20435C', '#45902C', '#E08720']
- linestyle=['-', '--']
- global_x_max = None
- global_x_min = None
- global_y_max = None
- for i, suite in enumerate(suites):
- dates = data['date'][data['suite'] == suite]
- # history in days
- history_length = dates.max() - dates.min()
- # make approx monthly bins, smaller bins yield spiky curves
- # needs new=True to work with oldish numpy
- (hist, bin_edges) = np.histogram(dates, np.ceil(history_length/30.))
- if False:
- # debug output ;-)
- print dates.min(), num2date(dates.min()), dates.max(), \
- num2date(dates.max()), history_length
- print bin_edges
- if len(bin_edges) < 2:
- # protect against single data point entries by ignoring them
- # wouldn't be able to draw a line anyway ;-)
- continue
- width = bin_edges[1] - bin_edges[0]
- # think lines
- y = hist / width
- global_y_max = max(max(y), global_y_max)
- ax.plot(bin_edges[:-1]+(width/2), y,
- label=suite, color=colors[i%4], linestyle=linestyle[i//4], lw=2)
- # transparent curve shading
- ax.fill_between(bin_edges[:-1]+(width/2), 0, hist / width, alpha=0.2,
- label=suite, color=colors[i%4])
- # figure out axis limits to avoid whitespace in plots
- x_max = bin_edges[-2] + width/2
- x_min = bin_edges[0] + width/2
-
- global_x_max = max(x_max, global_x_max)
- if global_x_min is None or x_min < global_x_min:
- global_x_min = x_min
-
- ax.set_xlim(global_x_min, global_x_max)
- ax.set_ylabel('New subscriptions [1/day]')
- if title:
- ax.set_title(title)
- if not ymax:
- # Always leave significant 5% for improvement ;-)
- ymax = global_y_max * 1.05
- ax.set_ylim(0, ymax)
- # set x-ticks in date
- # see: http://matplotlib.sourceforge.net/examples/api/date_demo.html
- ax.xaxis.set_major_locator(YearLocator())
- ax.xaxis.set_major_formatter(DateFormatter('\n\n%Y'))
- ax.xaxis.set_minor_locator(MonthLocator(interval=2))
- ax.xaxis.set_minor_formatter(DateFormatter('%b'))
- # format the coords message box
- ax.format_xdata = DateFormatter('%Y-%m-%d')
- ax.grid(True)
- # pukes with old matplotlib
- #font = FontProperties()
- #font.set_size = 8
- pl.legend(loc='upper left', #prop=font,
- labelspacing=.2, borderaxespad=.2,
- handletextpad=.2, borderpad=.2)
+releases = {
+ 'etch': 'Debian GNU/Linux 4.0 (etch)',
+ 'lenny': 'Debian GNU/Linux 5.0 (lenny)',
+ 'squeeze': 'Debian GNU/Linux 6.0 (squeeze)',
+ 'wheezy': 'Debian GNU/Linux 7.0 (wheezy)',
+ 'jessie': 'Debian testing (jessie)',
+ 'sid': 'Debian unstable (sid)',
+ 'hardy': 'Ubuntu 08.04 LTS "Hardy Heron" (hardy)',
+ 'jaunty': 'Ubuntu 09.04 "Jaunty Jackalope" (jaunty)',
+ 'karmic': 'Ubuntu 09.10 "Karmic Koala" (karmic)',
+ 'lucid': 'Ubuntu 10.04 LTS "Lucid Lynx" (lucid)',
+ 'maverick': 'Ubuntu 10.10 "Maverick Meerkat" (maverick)',
+ 'natty': 'Ubuntu 11.04 "Natty Narwhal" (natty)',
+ 'oneiric': 'Ubuntu 11.10 "Oneiric Ocelot" (oneiric)',
+ 'precise': 'Ubuntu 12.04 LTS "Precise Pangolin" (precise)',
+ 'quantal': 'Ubuntu 12.10 "Quantal Quetzal" (quantal)',
+ 'raring': 'Ubuntu 13.04 "Raring Ringtail" (raring)',
+ 'saucy': 'Ubuntu 13.10 "Saucy Salamander" (saucy)',
+}
if __name__ == '__main__':
- if not len(sys.argv) > 1:
- print 'Need output filename.'
- sys.exit(1)
- cfg_path="/home/neurodebian/neurodebian.git/neurodebian.cfg"
- #cfg_path="../neurodebian.cfg"
- cfg = SafeConfigParser()
- cfg.read(cfg_path)
- data = []
- for line in fileinput.FileInput(sys.argv[2:], openhook=fileinput.hook_compressed):
- date, list_, ip = line.split(';')
+ data = {}
+ # get the IP, date and target release
+ # the date is truncated to a month/year combo
+ listget = re.compile(r'^([0-9.:]*) .*\[([^:]*).*GET /lists/([a-z]*)')
+ for line in fileinput.FileInput(openhook=fileinput.hook_compressed):
+ match = listget.match(line)
+ if not match:
+ continue
+ addr, date, release = match.groups()
+ if not release in releases:
+ # ignore fantasy names
+ continue
+ date = datetime.strptime(date, '%d/%b/%Y')
+ # truncate to a week
try:
- suite, loc = list_.split('.')
+ date = datetime(date.year, date.month, date.day / 7 * 7 + 1)
except ValueError:
- suite = list_
- loc = ''
- date = datetime.strptime(date, "%d %b %Y")
- data.append((ip.strip(), loc, suite, date2num(date)))
- data = np.array(data, dtype=dt)
- make_figure(data).savefig(sys.argv[1], bbox_inches='tight', dpi=60)
+ # only on Feb28...
+ date = datetime(date.year, date.month, date.day / 7 * 7)
+ # microseconds since epoch
+ date = int(time.mktime(date.timetuple()) * 1000)
+ rstats = data.setdefault(releases[release], {})
+ rtime = rstats.setdefault(date, 0)
+ rtime += 1
+ rstats[date] = rtime
+ data[releases[release]] = rstats
+ # determine the union of all timestamps
+ timestamps = set()
+ for codename, stats in data.iteritems():
+ timestamps.union_update(stats.keys())
+ export = [{'key': release,
+ 'values': [[ts, float(data[release].setdefault(ts, 0)) / 7]
+ for ts in sorted(timestamps)]}
+ for release in sorted(data)]
+ print json.dumps(export)