+#!/usr/bin/python
+#emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
+#ex: set sts=4 ts=4 sw=4 noet:
+"""Script to do rudimentary checks of NeuroDebian mirrors to verify they are in good shape
+"""
+
+
+import sys
+from urllib import urlopen
+from ConfigParser import SafeConfigParser
+
+#cfg_path="/etc/neurodebian/neurodebian.cfg"
+cfg_path="./neurodebian.cfg"
+main_mirror='us-nh'
+
+# read configuration
+cfg = SafeConfigParser()
+cfg.read(cfg_path)
+
+# load information about mirrors
+mirrors = cfg.options('mirrors')
+urls = dict([(x, cfg.get('mirrors', x)) for x in mirrors])
+slave_mirrors = mirrors.pop(mirrors.index(main_mirror))
+
+#if True:
+def fetch_listing(url):
+ """Traverses whole website, obtains listing of all files available
+
+
+ TODO: eventually use scrapy, but stable one has only 0.8 while
+ 0.16 is out... so -- later
+ """
+ print url
+ #url = 'http://neuro.debian.net/debian/dists/dapper/'
+ #url = "http://mirror.aarnet.edu.au/pub/neurodebian/dists/dapper/"
+ parser = etree.HTMLParser()
+ from lxml.html import parse, submit_form, fromstring
+ #page = etree.parse(urlopen('http://neuro.debian.net/debian/dists/dapper/'), parser)
+ #page = objectify.parse(urlopen('http://neuro.debian.net/debian/dists/dapper/'), parser)
+ page = parse(url).getroot()
+
+ #page = fromstring(''.join(urlopen(url).readlines()))
+ #page.make_links_absolute(url)
+
+
+ # go through all rows with links
+ rows = [row for row in page.iter('tr')]
+ res = {}
+ for row in rows:
+ pass
+
+ # do I need parent actually for anything? yes -- time/size
+ # actually -- of no use since presence/presentation heavily varies
+ # across mirrors, so let's not rely on them
+ links = [ (l[0].getparent().getparent(),
+ l[2].endswith('/'),) +
+ l
+ for l in page.iterlinks()
+ if (l[1] == 'href'
+ and not (
+ l[2][0] in ('/', '?')
+ or l[2].startswith('http://')
+ or l[2].startswith('mailto:')
+ )) ]
+
+ for p, isdir, a, _, name, _ in links:
+ print name
+ if isdir:
+ fetch_listing('%s/%s' %
+ (url, name))
+
+if False:
+ for m, url in urls.iteritems():
+ print "Mirror %s" % m
+ fetch_listing(url + '/dists/dapper')
+else:
+ fetch_listing(urls[main_mirror] + '/dists/dapper')
+"""
+au has fancier index pages, so we would need to distil page first more
+"""