import pysvn
import json
from debian_bundle import deb822
+
+# Lets first assure no guarding (but annoying) warnings
+import warnings
+warnings.simplefilter('ignore', FutureWarning)
+warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
+
+import apt
from ConfigParser import SafeConfigParser
from optparse import OptionParser, Option, OptionGroup, OptionConflictError
import sys
import os
+import copy
import shutil
import urllib2
import urllib
+import codecs
import subprocess
+import time
+import re
# templating
from jinja2 import Environment, PackageLoader
urllib.urlcleanup()
# open cached file
- fh = open(cfilename, 'r')
+ fh = codecs.open(cfilename, 'r', 'utf-8')
return fh
for stanza in deb822.Packages.iter_paragraphs(fh):
if stanza.has_key('Depends'):
pkg = stanza['Depends']
+ elif stanza.has_key('Recommends'):
+ pkg = stanza['Recommends']
elif stanza.has_key('Suggests'):
pkg = stanza['Suggests']
else:
def get_emptydbentry():
return {'main': {}}
-def import_blendstask(db, url):
+def import_blendstask(cfg, db, url):
cache = AptListsCache()
fh = cache.get(url)
task_name = None
task_name = st['Task']
task = (blendname, task_name, taskpage_url)
- # do not stop unless we have a description
- if not st.has_key('Pkg-Description'):
- continue
-
if st.has_key('Depends'):
pkg = st['Depends']
+ elif st.has_key('Recommends'):
+ pkg = st['Recommends']
elif st.has_key('Suggests'):
pkg = st['Suggests']
else:
- print 'Warning: Cannot determine name of prospective package ' \
- '... ignoring.'
+# print 'Warning: Cannot determine name of prospective package ' \
+# '... ignoring. Dump follows:'
+# print st
continue
- if not db.has_key(pkg):
- print 'Ignoring blend package "%s"' % pkg
- continue
-
- info = {}
+ # take care of pkg lists
+ for p in pkg.split(', '):
+ if not db.has_key(p):
+ print 'Ignoring blend package "%s"' % p
+ continue
- # blends info
- info['tasks'] = [task]
- if st.has_key('License'):
- info['license'] = st['License']
- if st.has_key('Responsible'):
- info['responsible'] = st['Responsible']
-
- # pkg description
- descr = st['Pkg-Description'].replace('%', '%%').split('\n')
- info['description'] = descr[0].strip()
- info['long_description'] = ' '.join([l.strip() for l in descr[1:]])
-
- # charge the basic property set
- db[pkg]['main']['description'] = info['description']
- db[pkg]['main']['long_description'] = info['long_description']
- if st.has_key('WNPP'):
- db[pkg]['main']['debian_itp'] = st['WNPP']
- if st.has_key('Pkg-URL'):
- db[pkg]['main']['other_pkg'] = st['Pkg-URL']
- if st.has_key('Homepage'):
- db[pkg]['main']['homepage'] = st['Homepage']
-
- # only store if there isn't something already
- if not db[pkg].has_key('blends'):
- db[pkg]['blends'] = info
- else:
- # just add this tasks name and id
- db[pkg]['blends']['tasks'].append(task)
+ info = {}
+
+ # blends info
+ info['tasks'] = [task]
+ if st.has_key('License'):
+ info['license'] = st['License']
+ if st.has_key('Responsible'):
+ info['responsible'] = st['Responsible']
+
+ # pkg description
+ if st.has_key('Pkg-Description'):
+ descr = st['Pkg-Description'].split('\n')
+ info['description'] = descr[0].strip()
+ info['long_description'] = \
+ u'\n'.join(descr[1:])
+
+ # charge the basic property set
+ db[p]['main']['description'] = info['description']
+ db[p]['main']['long_description'] = info['long_description']
+ if st.has_key('WNPP'):
+ db[p]['main']['debian_itp'] = st['WNPP']
+ if st.has_key('Pkg-URL'):
+ db[p]['main']['other_pkg'] = st['Pkg-URL']
+ if st.has_key('Homepage'):
+ db[p]['main']['homepage'] = st['Homepage']
+
+ # Publications
+ if st.has_key('Published-Title'):
+ title = st['Published-Title']
+ if title[-1] == '.':
+ # trip trailing dot -- added later
+ pub = {'title': title[:-1]}
+ else:
+ pub = {'title': title}
+ if st.has_key('Published-Authors'):
+ pub['authors'] = st['Published-Authors']
+ if st.has_key('Published-Year'):
+ pub['year'] = st['Published-Year']
+ if st.has_key('Published-In'):
+ pub['in'] = st['Published-In']
+ if st.has_key('Published-URL'):
+ pub['url'] = st['Published-URL']
+ if st.has_key('Published-DOI'):
+ pub['doi'] = st['Published-DOI']
+ # need at least one URL
+ if not pub.has_key('url'):
+ pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
+
+ db[p]['main']['publication'] = pub
+
+ # Registration
+ if st.has_key('Registration'):
+ db[p]['main']['registration'] = st['Registration']
+
+ # Remarks
+ if st.has_key('Remark'):
+ # prepend a single space to make it look like a long description
+ info['remark'] = convert_longdescr(' ' + st['Remark'])
+
+ # only store if there isn't something already
+ if not db[p].has_key('blends'):
+ db[p]['blends'] = info
+ else:
+ # just add this tasks name and id
+ db[p]['blends']['tasks'].append(task)
+
+ # handle pkg name aliases
+ if p in cfg.options('blend package aliases'):
+ src_entry = db[p].copy()
+ # remove original entry
+ del db[p]
+ # copy the entry into all aliases
+ for alias in cfg.get('blend package aliases', p).split():
+ print "Aliasing %s to %s" % (p, alias)
+ db[alias] = copy.deepcopy(src_entry)
return db
info['version'] = st['Version']
# origin
- info['drc'] = '%s %s %s' % (origin, codename, component)
+ info['distribution'] = origin
+ info['release'] = codename
+ info['component'] = component
# pool url
info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
# pkg description
descr = st['Description'].replace('%', '%%').split('\n')
info['description'] = descr[0].strip()
- info['long_description'] = ' '.join([l.strip() for l in descr[1:]])
+ info['long_description'] = u'\n'.join(descr[1:])
db[pkg][distkey] = info
# charge the basic property set
db[pkg]['main']['description'] = info['description']
db[pkg]['main']['long_description'] = info['long_description']
+ if st.has_key('Source'):
+ db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
+ else:
+ db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
if st.has_key('Homepage'):
db[pkg]['main']['homepage'] = st['Homepage']
+ if st.has_key('Recommends'):
+ db[pkg]['main']['recommends'] = st['Recommends']
return db
os.mkdir(p)
-def dde_get(url):
+def dde_get(url, fail=False):
+ # enforce delay to be friendly to DDE
+ time.sleep(3)
+ try:
+ data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
+ print "SUCCESS:", url
+ return data
+ except urllib2.HTTPError, e:
+ print "NOINFO:", url, type(e)
+ return False
+ except urllib2.URLError, e:
+ print "URLERROR:", url, type(e)
+ if fail:
+ print "Permanant failure"
+ return False
+ print "Try again after 30 seconds..."
+ time.sleep(30)
+ return dde_get(url, fail=True)
+ except (StopIteration):
+ print "NOINFO:", url
+ return False
+ except json.ReadException, e:
+ print "UDD-DOWN?:", url, type(e)
+ return False
+
+
+def nitrc_get(spec, fail=False):
+ nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
try:
- return json.read(urllib2.urlopen(url+"?t=json").read())['r']
- except urllib2.HTTPError:
+ # change into this from python 2.6 on
+ #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
+ data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
+ print "NITRC-SUCCESS:", spec
+ except urllib2.HTTPError, e:
+ print "NITRC-NOINFO:", spec, type(e)
return False
+ except urllib2.URLError, e:
+ print "NITRC-URLERROR:", spec, type(e)
+ if fail:
+ print "Permanant failure"
+ return False
+ print "Try again after 30 seconds..."
+ time.sleep(30)
+ return nitrc_get(spec, fail=True)
+ return data
+
+
+def parse_nitrc(data):
+ if data is False:
+ return None
+ # simplify -- there is only one project in the data
+ project = data['projects'][0]
+ nitrc_filtered = {'downloads': 0,
+ 'id': project['id']}
+ for pkg in project['packages']:
+ for release in pkg['releases']:
+ for file in release['files']:
+ nitrc_filtered['downloads'] += file['download_count']
+ return nitrc_filtered
+
+
+def import_nitrc(cfg, db):
+ for p in db.keys():
+ if not cfg.has_option("nitrc ids", p):
+ continue
+ nitrc_spec = cfg.get("nitrc ids", p)
+ nitrc_data = nitrc_get(nitrc_spec)
+ nitrc_excerpt = parse_nitrc(nitrc_data)
+ if not nitrc_excerpt is None:
+ db[p]['nitrc'] = nitrc_excerpt
+ return db
def import_dde(cfg, db):
- dists = cfg.get('dde', 'dists').split()
query_url = cfg.get('dde', 'pkgquery_url')
for p in db.keys():
# get freshest
- q = dde_get(query_url + "/all/%s" % p)
+ q = dde_get(query_url + "/packages/all/%s" % p)
if q:
- db[p]['main'] = q
- for d in dists:
- q = dde_get(query_url + "/prio-%s/%s" % (d, p))
- if q:
- db[p][(trans_codename(d.split('-')[1], cfg),d)] = q
+ # copy all stuff, while preserving non-overlapping information
+ for k, v in q.iteritems():
+ db[p]['main'][k] = v
+ # get latest popcon info for debian and ubuntu
+ # cannot use origin field itself, since it is none for few packages
+ # i.e. python-nifti
+ origin = q['drc'].split()[0]
+ if origin == 'ubuntu':
+ if q.has_key('popcon'):
+ db[p]['main']['ubuntu_popcon'] = q['popcon']
+ # if we have ubuntu, need to get debian
+ q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
+ if q and q.has_key('popcon'):
+ db[p]['main']['debian_popcon'] = q['popcon']
+ elif origin == 'debian':
+ if q.has_key('popcon'):
+ db[p]['main']['debian_popcon'] = q['popcon']
+ # if we have debian, need to get ubuntu
+ q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
+ if q and q.has_key('popcon'):
+ db[p]['main']['ubuntu_popcon'] = q['popcon']
+ else:
+ print("Ignoring unkown origin '%s' for package '%s'." \
+ % (origin, p))
+
+ # now get info for package from all releases in UDD
+ q = dde_get(query_url + "/dist/p:%s" % p)
+ if not q:
+ continue
+ # hold all info about this package per distribution release
+ info = {}
+ for cp in q:
+ distkey = (trans_codename(cp['release'], cfg),
+ "%s-%s" % (cp['distribution'], cp['release']))
+ if not info.has_key(distkey):
+ info[distkey] = cp
+ # turn into a list to append others later
+ info[distkey]['architecture'] = [info[distkey]['architecture']]
+ # accumulate data for multiple over archs
+ else:
+ comp = apt.VersionCompare(cp['version'],
+ info[distkey]['version'])
+ # found another arch for the same version
+ if comp == 0:
+ info[distkey]['architecture'].append(cp['architecture'])
+ # found newer version, dump the old ones
+ elif comp > 0:
+ info[distkey] = cp
+ # turn into a list to append others later
+ info[distkey]['architecture'] = [info[distkey]['architecture']]
+ # simply ignore older versions
+ else:
+ pass
+
+ # finally assign the new package data
+ for k, v in info.iteritems():
+ db[p][k] = v
return db
+def assure_unicode(s):
+ """Assure that argument is unicode
-def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
+ Necessary if strings are not carrying out Pythonish 'u' prefix to
+ signal UTF8 strings, but are in fact UTF8
+ """
+ if type(s) is unicode:
+ return s
+ elif type(s) is str:
+ # attempt regular unicode call and if fails -- just decode it
+ # into utf8
+ try:
+ return unicode(s)
+ except UnicodeDecodeError, e:
+ return s.decode('utf8')
+ else:
+ return assure_unicode(str(s))
+
+
+def convert_longdescr(ld):
+ ld = ld.replace('% ', '%% ')
+ ld = ld.split('\n')
+ for i, l in enumerate(ld):
+ if l == ' .':
+ ld[i] = ' #NEWLINEMARKER#'
+ # look for embedded lists
+ elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
+ ld[i] = ' #NEWLINEMARKER# ' + l[2:]
+
+ ld = u' '.join([l[1:] for l in ld])
+ ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
+ # cleanup any leftover (e.g. trailing markers)
+ ld = ld.replace('#NEWLINEMARKER#', '')
+ # safe-guard ReST active symbols
+ ld = re.sub(r'([\'`*])', r'\\\1', ld)
+ return ld
+
+
+def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
+ print pkg
# local binding for ease of use
- db = db[pkg]
+ pkgdb = db[pkg]
# do nothing if there is not at least the very basic stuff
- if not db['main'].has_key('description'):
+ if not pkgdb['main'].has_key('description'):
return
- title = '**%s** -- %s' % (pkg, db['main']['description'])
+ title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
underline = '*' * (len(title) + 2)
title = '%s\n %s\n%s' % (underline, title, underline)
- # preprocess long description
- ld = db['main']['long_description']
- ld = ' '.join([l.lstrip(' .') for l in ld.split('\n')])
-
- page = template.render(pkg=pkg,
- title=title,
- long_description=ld,
- cfg=cfg,
- db=db)
+ ex_dir = None
+ if 'sv' in pkgdb['main']:
+ ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
+ if not os.path.exists(ex_dir):
+ ex_dir = None
+ page = template.render(
+ pkg=pkg,
+ title=title,
+ long_description=convert_longdescr(
+ assure_unicode(pkgdb['main']['long_description'])),
+ cfg=cfg,
+ db=pkgdb,
+ fulldb=db,
+ extracts_dir=ex_dir,
+ op=os.path)
# the following can be replaced by something like
# {% include "sidebar.html" ignore missing %}
# in the template whenever jinja 2.2 becomes available
def store_db(db, filename):
pp = PrettyPrinter(indent=2)
- f = open(filename, 'w')
+ f = codecs.open(filename, 'w', 'utf-8')
f.write(pp.pformat(db))
f.close()
def read_db(filename):
- f = open(filename)
+ f = codecs.open(filename, 'r', 'utf-8')
db = eval(f.read())
return db
def write_sourceslist(jinja_env, cfg, outdir):
create_dir(outdir)
- create_dir(os.path.join(outdir, '_static'))
+ create_dir(os.path.join(outdir, 'lists'))
repos = {}
for release in cfg.options('release codenames'):
+ if release == 'data':
+ # no seperate list for the data archive
+ continue
transrel = trans_codename(release, cfg)
repos[transrel] = []
for mirror in cfg.options('mirrors'):
- listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
+ listname = '%s.%s' % (release, mirror)
repos[transrel].append((mirror, listname))
- lf = open(os.path.join(outdir, '_static', listname), 'w')
- aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
- release)
- lf.write('deb %s' % aptcfg)
- lf.write('deb-src %s' % aptcfg)
+ lf = open(os.path.join(outdir, 'lists', listname), 'w')
+ for rel in ('data', release):
+ aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
+ rel)
+ lf.write('deb %s' % aptcfg)
+ lf.write('#deb-src %s' % aptcfg)
lf.close()
+ id2codename = dict([(cfg.get('release backport ids', r), r)
+ for r in cfg.options('release codenames')])
+ id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
+ for r in cfg.options('release codenames')])
+ mirror2name = dict([(m, cfg.get('mirror names', m))
+ for m in cfg.options('mirrors')])
+ mirror2url = dict([(m, cfg.get('mirrors', m))
+ for m in cfg.options('mirrors')])
srclist_template = jinja_env.get_template('sources_lists.rst')
sl = open(os.path.join(outdir, 'sources_lists'), 'w')
- sl.write(srclist_template.render(repos=repos))
+ sl.write(srclist_template.render(id2codename=id2codename,
+ id2relname=id2relname,
+ mirror2name=mirror2name,
+ mirror2url=mirror2url))
sl.close()
-def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
+def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
create_dir(outdir)
create_dir(os.path.join(outdir, 'pkgs'))
# generate the TOC with all packages
toc_template = jinja_env.get_template('pkgs_toc.rst')
- toc = open(os.path.join(outdir, 'pkgs.rst'), 'w')
- toc.write(toc_template.render(pkgs=db.keys()))
+ toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
+ # this is a fragile test
+ toc.write(toc_template.render(
+ pkgs=[k for k in db.keys()
+ if not ('Datasets (data)', 'neurodebian-data') in db[k]]))
toc.close()
+ # and now only for dataset packages
+ toc_template = jinja_env.get_template('datasets_toc.rst')
+ toc = codecs.open(os.path.join(outdir, 'datasets.rst'), 'w', 'utf-8')
+ # this is a fragile test
+ toc.write(toc_template.render(
+ pkgs=[k for k in db.keys()
+ if ('Datasets (data)', 'neurodebian-data') in db[k]]))
+ toc.close()
+
# and now each individual package page
pkg_template = jinja_env.get_template('pkg.rst')
for p in db.keys():
- page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
+ page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
# when no page is available skip this package
if page is None:
continue
- pf = open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w')
- pf.write(generate_pkgpage(p, cfg, db, pkg_template, addenum_dir))
+ pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
+ pf.write(page)
pf.close()
op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
type="string", default=None, help="None")
+ op.add_option("--extracts", action="store", dest="extracts_dir",
+ type="string", default=None, help="None")
+
def main():
op = OptionParser(version="%prog 0.0.2")
# get info from task files
if cfg.has_option('packages', 'prospective'):
for url in cfg.get('packages', 'prospective').split():
- db = import_blendstask(db, url)
+ db = import_blendstask(cfg, db, url)
# parse NeuroDebian repository
if cfg.has_option('neurodebian', 'releases'):
# collect package information from DDE
db = import_dde(cfg, db)
+ # get info from NITRC
+ db = import_nitrc(cfg, db)
# store the new DB
store_db(db, opts.db)
# and be done
jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
# generate package pages and TOC and write them to files
- write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
+ write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
write_sourceslist(jinja_env, cfg, opts.outdir)