2 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
3 # vi: set ft=python sts=4 ts=4 sw=4 et:
5 # Create a figure with the NeuroDebian repo subscription stats from the apache logs
7 # zgrep "GET /lists/[-a-z\.]\+ HTTP" neuro.debian.net-*access.log* | sed -e 's,[^:]*:\([0-9\.]\+\).*\[\(.*\):.*:.*:.*/lists/\(.*\) HTTP.*,\2;\3;\1,' -e 's,/, ,g'
8 # either from a file or on stdin. Needs output filename as the only argument
12 from datetime import datetime
17 from matplotlib.dates import date2num, num2date
18 from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
19 from matplotlib.font_manager import FontProperties
20 from ConfigParser import SafeConfigParser
29 def make_figure(data, ymax=None):
30 fig = pl.figure(figsize=(14,3))
31 distros = ('Debian', 'Ubuntu')
32 # Sorting is actually seems to be not needed on Python 2.7
33 # which probably returns release codenames in the order as
34 # in the config file which is already correct
35 # But since our server is still on previous stable release
36 # let's sort for now explicitly
37 # 9999 for 'nd' == 'sid'
38 sorting_ids = dict([(x[0], len(x[1])>2 and float(x[1][2:]) or 9999)
39 for x in cfg.items('release backport ids')])
40 for idistro, distro in enumerate(distros):
41 ax = fig.add_subplot(1, len(distros), idistro+1)
42 suites = [code for code in cfg.options('release codenames')
43 if cfg.get('release codenames', code).count(distro)]
44 # sort suites according to backport ids
45 # and in reverse order so the freshiest is on top
46 suites = sorted(suites,
47 cmp=lambda x,y: cmp(sorting_ids[x], sorting_ids[y]),
49 plot_datehist(ax, data, 10, suites, title=distro, ymax=ymax)
54 def plot_datehist(ax, data, bins, suites, title=None, ymax=None):
55 colors=['#ff0088', '#20435C', '#45902C', '#E08720']
60 for i, suite in enumerate(suites):
61 dates = data['date'][data['suite'] == suite]
63 history_length = dates.max() - dates.min()
64 # make approx monthly bins, smaller bins yield spiky curves
65 # needs new=True to work with oldish numpy
66 (hist, bin_edges) = np.histogram(dates, np.ceil(history_length/30.))
69 print dates.min(), num2date(dates.min()), dates.max(), \
70 num2date(dates.max()), history_length
72 if len(bin_edges) < 2:
73 # protect against single data point entries by ignoring them
74 # wouldn't be able to draw a line anyway ;-)
76 width = bin_edges[1] - bin_edges[0]
79 global_y_max = max(max(y), global_y_max)
80 ax.plot(bin_edges[:-1]+(width/2), y,
81 label=suite, color=colors[i%4], linestyle=linestyle[i//4], lw=2)
82 # transparent curve shading
83 ax.fill_between(bin_edges[:-1]+(width/2), 0, hist / width, alpha=0.2,
84 label=suite, color=colors[i%4])
85 # figure out axis limits to avoid whitespace in plots
86 x_max = bin_edges[-2] + width/2
87 x_min = bin_edges[0] + width/2
89 global_x_max = max(x_max, global_x_max)
90 if global_x_min is None or x_min < global_x_min:
93 ax.set_xlim(global_x_min, global_x_max)
94 ax.set_ylabel('New subscriptions [1/day]')
98 # Always leave significant 5% for improvement ;-)
99 ymax = global_y_max * 1.05
102 # set x-ticks in date
103 # see: http://matplotlib.sourceforge.net/examples/api/date_demo.html
104 ax.xaxis.set_major_locator(YearLocator())
105 ax.xaxis.set_major_formatter(DateFormatter('\n\n%Y'))
106 ax.xaxis.set_minor_locator(MonthLocator(interval=2))
107 ax.xaxis.set_minor_formatter(DateFormatter('%b'))
108 # format the coords message box
109 ax.format_xdata = DateFormatter('%Y-%m-%d')
111 # pukes with old matplotlib
112 #font = FontProperties()
114 pl.legend(loc='upper left', #prop=font,
115 labelspacing=.2, borderaxespad=.2,
116 handletextpad=.2, borderpad=.2)
119 if __name__ == '__main__':
120 if not len(sys.argv) > 1:
121 print 'Need output filename.'
123 cfg_path="/home/neurodebian/neurodebian.git/neurodebian.cfg"
124 #cfg_path="../neurodebian.cfg"
125 cfg = SafeConfigParser()
128 for line in fileinput.FileInput(sys.argv[2:], openhook=fileinput.hook_compressed):
129 date, list_, ip = line.split(';')
131 suite, loc = list_.split('.')
135 date = datetime.strptime(date, "%d %b %Y")
136 data.append((ip.strip(), loc, suite, date2num(date)))
137 data = np.array(data, dtype=dt)
138 make_figure(data).savefig(sys.argv[1], bbox_inches='tight', dpi=60)