#!/usr/bin/python # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- # vi: set ft=python sts=4 ts=4 sw=4 et: # import fileinput import sys import time from datetime import datetime import re import json import operator releases = { 'etch': 'Debian GNU/Linux 4.0 (etch)', 'lenny': 'Debian GNU/Linux 5.0 (lenny)', 'squeeze': 'Debian GNU/Linux 6.0 (squeeze)', 'wheezy': 'Debian GNU/Linux 7.0 (wheezy)', 'jessie': 'Debian testing (jessie)', 'sid': 'Debian unstable (sid)', 'hardy': 'Ubuntu 08.04 LTS "Hardy Heron" (hardy)', 'jaunty': 'Ubuntu 09.04 "Jaunty Jackalope" (jaunty)', 'karmic': 'Ubuntu 09.10 "Karmic Koala" (karmic)', 'lucid': 'Ubuntu 10.04 LTS "Lucid Lynx" (lucid)', 'maverick': 'Ubuntu 10.10 "Maverick Meerkat" (maverick)', 'natty': 'Ubuntu 11.04 "Natty Narwhal" (natty)', 'oneiric': 'Ubuntu 11.10 "Oneiric Ocelot" (oneiric)', 'precise': 'Ubuntu 12.04 LTS "Precise Pangolin" (precise)', 'quantal': 'Ubuntu 12.10 "Quantal Quetzal" (quantal)', 'raring': 'Ubuntu 13.04 "Raring Ringtail" (raring)', 'saucy': 'Ubuntu 13.10 "Saucy Salamander" (saucy)', 'trusty': 'Ubuntu 14.04 "Trusty Tahr" (trusty)', } if __name__ == '__main__': data = {} # get the IP, date and target release # the date is truncated to a month/year combo listget = re.compile(r'^([0-9.:]*) .*\[([^:]*).*GET /lists/([a-z]*)') for line in fileinput.FileInput(openhook=fileinput.hook_compressed): match = listget.match(line) if not match: continue addr, date, release = match.groups() if not release in releases: # ignore fantasy names continue date = datetime.strptime(date, '%d/%b/%Y') # truncate to a week try: date = datetime(date.year, date.month, date.day / 7 * 7 + 1) except ValueError: # only on Feb28... date = datetime(date.year, date.month, date.day / 7 * 7) # microseconds since epoch date = int(time.mktime(date.timetuple()) * 1000) rstats = data.setdefault(releases[release], {}) rtime = rstats.setdefault(date, 0) rtime += 1 rstats[date] = rtime data[releases[release]] = rstats # determine the union of all timestamps timestamps = set() for codename, stats in data.iteritems(): timestamps.update(stats.keys()) export = [{'key': release, 'values': [[ts, float(data[release].setdefault(ts, 0)) / 7] for ts in sorted(timestamps)]} for release in sorted(data)] print json.dumps(export)