#!/usr/bin/python # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- # vi: set ft=python sts=4 ts=4 sw=4 et: # # Create a figure with the NeuroDebian repo subscription stats from the apache logs # Requires out put of # zgrep "GET /lists/[-a-z\.]\+ HTTP" neuro.debian.net-*access.log* | sed -e 's,[^:]*:\([0-9\.]\+\).*\[\(.*\):.*:.*:.*/lists/\(.*\) HTTP.*,\2;\3;\1,' -e 's,/, ,g' # either from a file or on stdin. Needs output filename as the only argument import fileinput import sys from datetime import datetime import numpy as np import matplotlib matplotlib.use('Agg') import pylab as pl from matplotlib.dates import date2num, num2date from matplotlib.dates import YearLocator, MonthLocator, DateFormatter from matplotlib.font_manager import FontProperties from ConfigParser import SafeConfigParser from math import ceil dt = [('ip', '|S16'), ('loc', '|S3'), ('suite', '|S20'), ('date', float)] def make_figure(data, ymax=None): fig = pl.figure(figsize=(14,3)) distros = ('Debian', 'Ubuntu') # Sorting is actually seems to be not needed on Python 2.7 # which probably returns release codenames in the order as # in the config file which is already correct # But since our server is still on previous stable release # let's sort for now explicitly # 9999 for 'nd' == 'sid' sorting_ids = dict([(x[0], len(x[1])>2 and float(x[1][2:]) or 9999) for x in cfg.items('release backport ids')]) for idistro, distro in enumerate(distros): ax = fig.add_subplot(1, len(distros), idistro+1) suites = [code for code in cfg.options('release codenames') if cfg.get('release codenames', code).count(distro)] # sort suites according to backport ids # and in reverse order so the freshiest is on top suites = sorted(suites, cmp=lambda x,y: cmp(sorting_ids[x], sorting_ids[y]), reverse=True) plot_datehist(ax, data, 10, suites, title=distro, ymax=ymax) fig.autofmt_xdate() return fig def plot_datehist(ax, data, bins, suites, title=None, ymax=None): colors=['#ff0088', '#20435C', '#45902C', '#E08720'] linestyle=['-', '--'] global_x_max = None global_x_min = None global_y_max = None for i, suite in enumerate(suites): dates = data['date'][data['suite'] == suite] # history in days history_length = dates.max() - dates.min() # make approx monthly bins, smaller bins yield spiky curves # needs new=True to work with oldish numpy (hist, bin_edges) = np.histogram(dates, np.ceil(history_length/30.)) if False: # debug output ;-) print dates.min(), num2date(dates.min()), dates.max(), \ num2date(dates.max()), history_length print bin_edges if len(bin_edges) < 2: # protect against single data point entries by ignoring them # wouldn't be able to draw a line anyway ;-) continue width = bin_edges[1] - bin_edges[0] # think lines y = hist / width global_y_max = max(max(y), global_y_max) ax.plot(bin_edges[:-1]+(width/2), y, label=suite, color=colors[i%4], linestyle=linestyle[i//4], lw=2) # transparent curve shading ax.fill_between(bin_edges[:-1]+(width/2), 0, hist / width, alpha=0.2, label=suite, color=colors[i%4]) # figure out axis limits to avoid whitespace in plots x_max = bin_edges[-2] + width/2 x_min = bin_edges[0] + width/2 global_x_max = max(x_max, global_x_max) if global_x_min is None or x_min < global_x_min: global_x_min = x_min ax.set_xlim(global_x_min, global_x_max) ax.set_ylabel('New subscriptions [1/day]') if title: ax.set_title(title) if not ymax: # Always leave significant 5% for improvement ;-) ymax = global_y_max * 1.05 ax.set_ylim(0, ymax) # set x-ticks in date # see: http://matplotlib.sourceforge.net/examples/api/date_demo.html ax.xaxis.set_major_locator(YearLocator()) ax.xaxis.set_major_formatter(DateFormatter('\n\n%Y')) ax.xaxis.set_minor_locator(MonthLocator(interval=2)) ax.xaxis.set_minor_formatter(DateFormatter('%b')) # format the coords message box ax.format_xdata = DateFormatter('%Y-%m-%d') ax.grid(True) # pukes with old matplotlib #font = FontProperties() #font.set_size = 8 pl.legend(loc='upper left', #prop=font, labelspacing=.2, borderaxespad=.2, handletextpad=.2, borderpad=.2) if __name__ == '__main__': if not len(sys.argv) > 1: print 'Need output filename.' sys.exit(1) cfg_path="/home/neurodebian/neurodebian.git/neurodebian.cfg" #cfg_path="../neurodebian.cfg" cfg = SafeConfigParser() cfg.read(cfg_path) data = [] for line in fileinput.FileInput(sys.argv[2:], openhook=fileinput.hook_compressed): date, list_, ip = line.split(';') try: suite, loc = list_.split('.') except ValueError: suite = list_ loc = '' date = datetime.strptime(date, "%d %b %Y") data.append((ip.strip(), loc, suite, date2num(date))) data = np.array(data, dtype=dt) make_figure(data).savefig(sys.argv[1], bbox_inches='tight', dpi=60)