From 86b119c8e1f8d3c0f42c0229d1e417e9218499ab Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Sat, 27 Apr 2013 22:14:34 -0400 Subject: [PATCH] initial changes (unfinished) for veriymyrrors --- neurodebian.cfg | 14 ++++++++ tools/nd_verifymirrors | 80 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100755 tools/nd_verifymirrors diff --git a/neurodebian.cfg b/neurodebian.cfg index ca78e58..b0a7ea3 100644 --- a/neurodebian.cfg +++ b/neurodebian.cfg @@ -22,6 +22,20 @@ us-ca = USA-CA (Paul Ivanov, California) us-nh = USA-NH (Dartmouth College) us-tn = USA-TN (Vanderbilt) +[mirrors monitor] +# stamp file +stampfile = .timestamp +# Email to announce problems to +#email = yoh@onerussian.com +# seconds to wait for a response from the server before considering +# server non-responsive +#timeout = 30 ; seconds +# how many hours to allow for a mirror to be stale before emailing +warn-threshold = 24 ; hours +# how frequently to email in hours +#email-period = 24 ; hours + + [release files] # Release files of all repositories to be contained in the website # Package info files for all these releases will be downloaded. diff --git a/tools/nd_verifymirrors b/tools/nd_verifymirrors new file mode 100755 index 0000000..909be45 --- /dev/null +++ b/tools/nd_verifymirrors @@ -0,0 +1,80 @@ +#!/usr/bin/python +#emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- +#ex: set sts=4 ts=4 sw=4 noet: +"""Script to do rudimentary checks of NeuroDebian mirrors to verify they are in good shape +""" + + +import sys +from urllib import urlopen +from ConfigParser import SafeConfigParser + +#cfg_path="/etc/neurodebian/neurodebian.cfg" +cfg_path="./neurodebian.cfg" +main_mirror='us-nh' + +# read configuration +cfg = SafeConfigParser() +cfg.read(cfg_path) + +# load information about mirrors +mirrors = cfg.options('mirrors') +urls = dict([(x, cfg.get('mirrors', x)) for x in mirrors]) +slave_mirrors = mirrors.pop(mirrors.index(main_mirror)) + +#if True: +def fetch_listing(url): + """Traverses whole website, obtains listing of all files available + + + TODO: eventually use scrapy, but stable one has only 0.8 while + 0.16 is out... so -- later + """ + print url + #url = 'http://neuro.debian.net/debian/dists/dapper/' + #url = "http://mirror.aarnet.edu.au/pub/neurodebian/dists/dapper/" + parser = etree.HTMLParser() + from lxml.html import parse, submit_form, fromstring + #page = etree.parse(urlopen('http://neuro.debian.net/debian/dists/dapper/'), parser) + #page = objectify.parse(urlopen('http://neuro.debian.net/debian/dists/dapper/'), parser) + page = parse(url).getroot() + + #page = fromstring(''.join(urlopen(url).readlines())) + #page.make_links_absolute(url) + + + # go through all rows with links + rows = [row for row in page.iter('tr')] + res = {} + for row in rows: + pass + + # do I need parent actually for anything? yes -- time/size + # actually -- of no use since presence/presentation heavily varies + # across mirrors, so let's not rely on them + links = [ (l[0].getparent().getparent(), + l[2].endswith('/'),) + + l + for l in page.iterlinks() + if (l[1] == 'href' + and not ( + l[2][0] in ('/', '?') + or l[2].startswith('http://') + or l[2].startswith('mailto:') + )) ] + + for p, isdir, a, _, name, _ in links: + print name + if isdir: + fetch_listing('%s/%s' % + (url, name)) + +if False: + for m, url in urls.iteritems(): + print "Mirror %s" % m + fetch_listing(url + '/dists/dapper') +else: + fetch_listing(urls[main_mirror] + '/dists/dapper') +""" +au has fancier index pages, so we would need to distil page first more +""" -- 2.39.2