X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=tools%2Fnd_apachelogs2subscriptionstats;h=a2a3f469ce897a689835b099f62b9cdd16b99697;hb=HEAD;hp=f7962ba24fcda13b14203c27b749af5e8d0d89e0;hpb=07c75408b31ab61fa5c6be0a807955679a3b7531;p=neurodebian.git diff --git a/tools/nd_apachelogs2subscriptionstats b/tools/nd_apachelogs2subscriptionstats index f7962ba..a2a3f46 100755 --- a/tools/nd_apachelogs2subscriptionstats +++ b/tools/nd_apachelogs2subscriptionstats @@ -1,107 +1,71 @@ #!/usr/bin/python +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: # -# Create a figure with the NeuroDebian repo subscription stats from the apache logs -# Requires out put of -# zgrep "GET /lists/[a-z\.]\+ HTTP" neuro.debian.net-*access*gz | sed -e 's,.*gz:\([0-9\.]\+\).*\[\(.*\):.*:.*:.*/lists/\(.*\) HTTP.*,\2;\3;\1,' -e 's,/, ,g' -# either from a file or on stdin. Needs output filename as the only argument - import fileinput import sys +import time from datetime import datetime -import numpy as np -import matplotlib -matplotlib.use('Agg') -import pylab as pl -from matplotlib.dates import date2num -from matplotlib.dates import YearLocator, MonthLocator, DateFormatter -from matplotlib.font_manager import FontProperties -from ConfigParser import SafeConfigParser - - -dt = [('ip', '|S16'), - ('loc', '|S3'), - ('suite', '|S20'), - ('date', float)] - - -def make_figure(data, ymax): - fig = pl.figure(figsize=(14,3)) - ax = fig.add_subplot(121) - plot_datehist(ax, data, 10, [code for code in cfg.options('release codenames') if cfg.get('release codenames', code).count('Debian')], title="Debian", ymax=ymax) - ax = fig.add_subplot(122) - plot_datehist(ax, data, 10, [code for code in cfg.options('release codenames') if cfg.get('release codenames', code).count('Ubuntu')], title="Ubuntu", ymax=ymax) - fig.autofmt_xdate() - return fig - - -def plot_datehist(ax, data, bins, suites, title=None, ymax=None): - colors=['#ff0088', '#20435C', '#45902C', '#E08720'] - linestyle=['-', '--'] - global_x_max = None - global_x_min = None - global_y_max = None - for i, suite in enumerate(suites): - dates = data['date'][data['suite'] == suite] - # history in days - history_length = dates.max() - dates.min() - # make approx monthly bins, smaller bins yield spiky curves - # needs new=True to work with oldish numpy - (hist, bin_edges) = np.histogram(dates, np.ceil(history_length/30.), new=True) - width = bin_edges[1] - bin_edges[0] - # think lines - ax.plot(bin_edges[:-1]+(width/2), hist / width, - label=suite, color=colors[i%4], linestyle=linestyle[i//4], lw=2) - # transparent curve shading - ax.fill_between(bin_edges[:-1]+(width/2), 0, hist / width, alpha=0.2, - label=suite, color=colors[i%4]) - # figure out axis limits to avoid whitespace in plots - x_max = bin_edges[-2] + width/2 - x_min = bin_edges[0] + width/2 - if global_x_max is None or x_max > global_x_max: - global_x_max = x_max - if global_x_min is None or x_min < global_x_min: - global_x_min = x_min +import re +import json +import operator - ax.set_xlim(global_x_min, global_x_max) - ax.set_ylabel('New subscriptions [1/day]') - if title: - ax.set_title(title) - if ymax: - ax.set_ylim(0, ymax) - # set x-ticks in date - # see: http://matplotlib.sourceforge.net/examples/api/date_demo.html - ax.xaxis.set_major_locator(YearLocator()) - ax.xaxis.set_major_formatter(DateFormatter('\n\n%Y')) - ax.xaxis.set_minor_locator(MonthLocator()) - ax.xaxis.set_minor_formatter(DateFormatter('%b')) - # format the coords message box - ax.format_xdata = DateFormatter('%Y-%m-%d') - ax.grid(True) - # pukes with old matplotlib - #font = FontProperties() - #font.set_size = 8 - pl.legend(loc='upper left', #prop=font, - labelspacing=.2, borderaxespad=.2, - handletextpad=.2, borderpad=.2) +releases = { + 'etch': 'Debian GNU/Linux 4.0 (etch)', + 'lenny': 'Debian GNU/Linux 5.0 (lenny)', + 'squeeze': 'Debian GNU/Linux 6.0 (squeeze)', + 'wheezy': 'Debian GNU/Linux 7.0 (wheezy)', + 'jessie': 'Debian testing (jessie)', + 'sid': 'Debian unstable (sid)', + 'hardy': 'Ubuntu 08.04 LTS "Hardy Heron" (hardy)', + 'jaunty': 'Ubuntu 09.04 "Jaunty Jackalope" (jaunty)', + 'karmic': 'Ubuntu 09.10 "Karmic Koala" (karmic)', + 'lucid': 'Ubuntu 10.04 LTS "Lucid Lynx" (lucid)', + 'maverick': 'Ubuntu 10.10 "Maverick Meerkat" (maverick)', + 'natty': 'Ubuntu 11.04 "Natty Narwhal" (natty)', + 'oneiric': 'Ubuntu 11.10 "Oneiric Ocelot" (oneiric)', + 'precise': 'Ubuntu 12.04 LTS "Precise Pangolin" (precise)', + 'quantal': 'Ubuntu 12.10 "Quantal Quetzal" (quantal)', + 'raring': 'Ubuntu 13.04 "Raring Ringtail" (raring)', + 'saucy': 'Ubuntu 13.10 "Saucy Salamander" (saucy)', + 'trusty': 'Ubuntu 14.04 "Trusty Tahr" (trusty)', +} if __name__ == '__main__': - if not len(sys.argv) > 1: - print 'Need output filename.' - sys.exit(1) - cfg_path="/home/neurodebian/neurodebian.git/neurodebian.cfg" - cfg = SafeConfigParser() - cfg.read(cfg_path) - data = [] - for line in fileinput.FileInput(sys.argv[2:], openhook=fileinput.hook_compressed): - date, list_, ip = line.split(';') + data = {} + # get the IP, date and target release + # the date is truncated to a month/year combo + listget = re.compile(r'^([0-9.:]*) .*\[([^:]*).*GET /lists/([a-z]*)') + for line in fileinput.FileInput(openhook=fileinput.hook_compressed): + match = listget.match(line) + if not match: + continue + addr, date, release = match.groups() + if not release in releases: + # ignore fantasy names + continue + date = datetime.strptime(date, '%d/%b/%Y') + # truncate to a week try: - suite, loc = list_.split('.') + date = datetime(date.year, date.month, date.day / 7 * 7 + 1) except ValueError: - suite = list_ - loc = '' - date = datetime.strptime(date, "%d %b %Y") - data.append((ip.strip(), loc, suite, date2num(date))) - data = np.array(data, dtype=dt) - make_figure(data, ymax=13).savefig(sys.argv[1], bbox_inches='tight', dpi=60) + # only on Feb28... + date = datetime(date.year, date.month, date.day / 7 * 7) + # microseconds since epoch + date = int(time.mktime(date.timetuple()) * 1000) + rstats = data.setdefault(releases[release], {}) + rtime = rstats.setdefault(date, 0) + rtime += 1 + rstats[date] = rtime + data[releases[release]] = rstats + # determine the union of all timestamps + timestamps = set() + for codename, stats in data.iteritems(): + timestamps.update(stats.keys()) + export = [{'key': release, + 'values': [[ts, float(data[release].setdefault(ts, 0)) / 7] + for ts in sorted(timestamps)]} + for release in sorted(data)] + print json.dumps(export)