#!/usr/bin/python
+# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
+# vi: set ft=python sts=4 ts=4 sw=4 et:
#
-# Create a figure with the NeuroDebian repo subscription stats from the apache logs
-# Requires out put of
-# zgrep "GET /lists/[a-z\.]\+ HTTP" neuro.debian.net-*access*gz | sed -e 's,.*gz:\([0-9\.]\+\).*\[\(.*\):.*:.*:.*/lists/\(.*\) HTTP.*,\2;\3;\1,' -e 's,/, ,g'
-# either from a file or on stdin. Needs output filename as the only argument
-
import fileinput
import sys
+import time
from datetime import datetime
-import numpy as np
-import matplotlib
-matplotlib.use('Agg')
-import pylab as pl
-from matplotlib.dates import date2num
-from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
-from matplotlib.font_manager import FontProperties
-from ConfigParser import SafeConfigParser
-
-
-dt = [('ip', '|S16'),
- ('loc', '|S3'),
- ('suite', '|S20'),
- ('date', float)]
-
-
-def make_figure(data, ymax):
- fig = pl.figure(figsize=(14,3))
- ax = fig.add_subplot(121)
- plot_datehist(ax, data, 10, [code for code in cfg.options('release codenames') if cfg.get('release codenames', code).count('Debian')], title="Debian", ymax=ymax)
- ax = fig.add_subplot(122)
- plot_datehist(ax, data, 10, [code for code in cfg.options('release codenames') if cfg.get('release codenames', code).count('Ubuntu')], title="Ubuntu", ymax=ymax)
- fig.autofmt_xdate()
- return fig
+import re
+import sets
+import json
+import operator
-def plot_datehist(ax, data, bins, suites, title=None, ymax=None):
- colors=['#ff0088', '#20435C', '#45902C', '#E08720']
- linestyle=['-', '--']
- global_x_max = None
- global_x_min = None
- global_y_max = None
- for i, suite in enumerate(suites):
- dates = data['date'][data['suite'] == suite]
- # history in days
- history_length = dates.max() - dates.min()
- # make approx monthly bins, smaller bins yield spiky curves
- # needs new=True to work with oldish numpy
- (hist, bin_edges) = np.histogram(dates, np.ceil(history_length/30.), new=True)
- if len(bin_edges) < 2:
- # protect against single data point entries by ignoring them
- # wouldn't be able to draw a line anyway ;-)
- continue
- width = bin_edges[1] - bin_edges[0]
- # think lines
- ax.plot(bin_edges[:-1]+(width/2), hist / width,
- label=suite, color=colors[i%4], linestyle=linestyle[i//4], lw=2)
- # transparent curve shading
- ax.fill_between(bin_edges[:-1]+(width/2), 0, hist / width, alpha=0.2,
- label=suite, color=colors[i%4])
- # figure out axis limits to avoid whitespace in plots
- x_max = bin_edges[-2] + width/2
- x_min = bin_edges[0] + width/2
- if global_x_max is None or x_max > global_x_max:
- global_x_max = x_max
- if global_x_min is None or x_min < global_x_min:
- global_x_min = x_min
-
- ax.set_xlim(global_x_min, global_x_max)
- ax.set_ylabel('New subscriptions [1/day]')
- if title:
- ax.set_title(title)
- if ymax:
- ax.set_ylim(0, ymax)
-
- # set x-ticks in date
- # see: http://matplotlib.sourceforge.net/examples/api/date_demo.html
- ax.xaxis.set_major_locator(YearLocator())
- ax.xaxis.set_major_formatter(DateFormatter('\n\n%Y'))
- ax.xaxis.set_minor_locator(MonthLocator())
- ax.xaxis.set_minor_formatter(DateFormatter('%b'))
- # format the coords message box
- ax.format_xdata = DateFormatter('%Y-%m-%d')
- ax.grid(True)
- # pukes with old matplotlib
- #font = FontProperties()
- #font.set_size = 8
- pl.legend(loc='upper left', #prop=font,
- labelspacing=.2, borderaxespad=.2,
- handletextpad=.2, borderpad=.2)
+releases = {
+ 'etch': 'Debian GNU/Linux 4.0 (etch)',
+ 'lenny': 'Debian GNU/Linux 5.0 (lenny)',
+ 'squeeze': 'Debian GNU/Linux 6.0 (squeeze)',
+ 'wheezy': 'Debian GNU/Linux 7.0 (wheezy)',
+ 'jessie': 'Debian testing (jessie)',
+ 'sid': 'Debian unstable (sid)',
+ 'hardy': 'Ubuntu 08.04 LTS "Hardy Heron" (hardy)',
+ 'jaunty': 'Ubuntu 09.04 "Jaunty Jackalope" (jaunty)',
+ 'karmic': 'Ubuntu 09.10 "Karmic Koala" (karmic)',
+ 'lucid': 'Ubuntu 10.04 LTS "Lucid Lynx" (lucid)',
+ 'maverick': 'Ubuntu 10.10 "Maverick Meerkat" (maverick)',
+ 'natty': 'Ubuntu 11.04 "Natty Narwhal" (natty)',
+ 'oneiric': 'Ubuntu 11.10 "Oneiric Ocelot" (oneiric)',
+ 'precise': 'Ubuntu 12.04 LTS "Precise Pangolin" (precise)',
+ 'quantal': 'Ubuntu 12.10 "Quantal Quetzal" (quantal)',
+ 'raring': 'Ubuntu 13.04 "Raring Ringtail" (raring)',
+ 'saucy': 'Ubuntu 13.10 "Saucy Salamander" (saucy)',
+}
if __name__ == '__main__':
- if not len(sys.argv) > 1:
- print 'Need output filename.'
- sys.exit(1)
- cfg_path="/home/neurodebian/neurodebian.git/neurodebian.cfg"
- cfg = SafeConfigParser()
- cfg.read(cfg_path)
- data = []
- for line in fileinput.FileInput(sys.argv[2:], openhook=fileinput.hook_compressed):
- date, list_, ip = line.split(';')
+ data = {}
+ # get the IP, date and target release
+ # the date is truncated to a month/year combo
+ listget = re.compile(r'^([0-9.:]*) .*\[([^:]*).*GET /lists/([a-z]*)')
+ for line in fileinput.FileInput(openhook=fileinput.hook_compressed):
+ match = listget.match(line)
+ if not match:
+ continue
+ addr, date, release = match.groups()
+ if not release in releases:
+ # ignore fantasy names
+ continue
+ date = datetime.strptime(date, '%d/%b/%Y')
+ # truncate to a week
try:
- suite, loc = list_.split('.')
+ date = datetime(date.year, date.month, date.day / 7 * 7 + 1)
except ValueError:
- suite = list_
- loc = ''
- date = datetime.strptime(date, "%d %b %Y")
- data.append((ip.strip(), loc, suite, date2num(date)))
- data = np.array(data, dtype=dt)
- make_figure(data, ymax=18).savefig(sys.argv[1], bbox_inches='tight', dpi=60)
+ # only on Feb28...
+ date = datetime(date.year, date.month, date.day / 7 * 7)
+ # microseconds since epoch
+ date = int(time.mktime(date.timetuple()) * 1000)
+ rstats = data.setdefault(releases[release], {})
+ rtime = rstats.setdefault(date, 0)
+ rtime += 1
+ rstats[date] = rtime
+ data[releases[release]] = rstats
+ # determine the union of all timestamps
+ timestamps = sets.Set()
+ for codename, stats in data.iteritems():
+ timestamps.union_update(stats.keys())
+ export = [{'key': release,
+ 'values': [[ts, float(data[release].setdefault(ts, 0)) / 7]
+ for ts in sorted(timestamps)]}
+ for release in sorted(data)]
+ print json.dumps(export)