import pysvn
import json
-from debian_bundle import deb822
+import numpy as np
+
+from ConfigParser import SafeConfigParser
+from optparse import OptionParser, OptionGroup, OptionConflictError
# Lets first assure no guarding (but annoying) warnings
import warnings
warnings.simplefilter('ignore', FutureWarning)
-warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
+warnings.filterwarnings('ignore',
+ 'Module debian_bundle was already imported.*', UserWarning)
+
+from debian import deb822
+import apt # initializes the "_system" ;)
+from apt_pkg import version_compare
-import apt
-from ConfigParser import SafeConfigParser
-from optparse import OptionParser, Option, OptionGroup, OptionConflictError
import sys
import os
import copy
import subprocess
import time
import re
+
# templating
from jinja2 import Environment, PackageLoader
# enforce delay to be friendly to DDE
time.sleep(3)
try:
- data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
+ data = json.load(urllib2.urlopen(url+"?t=json"))['r']
print "SUCCESS:", url
return data
except urllib2.HTTPError, e:
except (StopIteration):
print "NOINFO:", url
return False
- except json.ReadException, e:
+ except Exception, e:
print "UDD-DOWN?:", url, type(e)
return False
try:
# change into this from python 2.6 on
#data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
- data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
+ data = json.load(urllib2.urlopen(nitrc_url + '?spec=%s' % spec))
print "NITRC-SUCCESS:", spec
except urllib2.HTTPError, e:
print "NITRC-NOINFO:", spec, type(e)
if q.has_key('popcon'):
db[p]['main']['debian_popcon'] = q['popcon']
# if we have debian, need to get ubuntu
- q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
+ q = dde_get(query_url + "/packages/prio-ubuntu-precise/%s" % p)
if q and q.has_key('popcon'):
db[p]['main']['ubuntu_popcon'] = q['popcon']
else:
info[distkey]['architecture'] = [info[distkey]['architecture']]
# accumulate data for multiple over archs
else:
- comp = apt.VersionCompare(cp['version'],
- info[distkey]['version'])
+ comp = version_compare(cp['version'],
+ info[distkey]['version'])
# found another arch for the same version
if comp == 0:
info[distkey]['architecture'].append(cp['architecture'])
def convert_longdescr(ld):
+ """
+
+ yoh: I think all this long description conversion will keep giving
+ us problems since per se there is no strict regulations,
+ especially in blends files
+ """
+ descr = u''
ld = ld.replace('% ', '%% ')
+ ld = ld.replace(r'\t', ' ') # just in case assuming tab 4
ld = ld.split('\n')
- for i, l in enumerate(ld):
- if l == ' .':
- ld[i] = ' #NEWLINEMARKER#'
- # look for embedded lists
- elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
- ld[i] = ' #NEWLINEMARKER# ' + l[2:]
-
- ld = u' '.join([l[1:] for l in ld])
- ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
- # cleanup any leftover (e.g. trailing markers)
- ld = ld.replace('#NEWLINEMARKER#', '')
- # safe-guard ReST active symbols
- ld = re.sub(r'([\'`*])', r'\\\1', ld)
- return ld
+ re_leadblanks = re.compile("^ *")
+ re_itemized = re.compile("^[o*-+] +")
+ re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
+ re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
+
+ def unwrap_lines(lines):
+ out = []
+ indent_levels = [-1]
+ for l in lines:
+ match = re_itemized_gr.search(l).groups()
+ if ((len(match[0]) in indent_levels and match[1] is None)
+ or (len(match[0]) > max(indent_levels)+4)) \
+ and match[2].strip() != '.':
+ # append to previous
+ if not out[-1].endswith(" "):
+ out[-1] += " "
+ out[-1] += match[2]
+ else:
+ out.append(l)
+
+ indent_levels = [len(match[0])]
+ if match[1] is not None:
+ indent_levels += [len(match[0]) + len(match[1])]
+ if match[2].strip() == '.':
+ # reset though if '.'
+ indent_levels = [-1]
+ return out
+
+ def dedent_withlevel(lines):
+ """Dedent `lines` given in a list provide dedented lines and how much was dedented
+ """
+ nleading = min([re_leadblanks.search(l).span()[1]
+ for l in lines])
+ return [l[nleading:] for l in lines], nleading
+
+ def block_lines(ld, level=0):
+ # so we got list of lines
+ # dedent all of them first
+ ld, level = dedent_withlevel(ld)
+
+ # lets collect them in blocks/paragraphs
+ # 1. into paragraphs split by '.'
+ blocks, block = [], None
+
+ # next block can begin if
+ # 1. . line
+ # 2. it was an itemized list and all items begin with
+ # the same symbol or get further indented accordingly
+ # so let's first check if it is an itemized list
+ itemized_match = re_itemized.search(ld[0])
+ if itemized_match:
+ allow_indents = " "*itemized_match.span()[1]
+ else:
+ allow_indents = None
+ for l in ld:
+ if block is None or l.strip() == '.' \
+ or (len(l) and ( len(block) and (
+ (l.startswith(' ') and not block[-1].startswith(' '))
+ or
+ (not l.startswith(' ') and block[-1].startswith(' '))))):
+ block = []
+ blocks.append(block)
+ if l.strip() != '.':
+ block.append(l)
+ if len(blocks) == 1:
+ return blocks[0]
+ else:
+ return [block_lines(b, level+1) for b in blocks if len(b)]
+
+ def blocks_to_rst(bls, level=0):
+ # check if this block is an itemized beast
+ #itemized_match = re_itemized_gr.search(bls[0][0])
+ #if itemized_match:
+ # res += ' 'allow_indents = " "*itemized_match.span()[1]
+ out = ''
+ for b in bls:
+ if isinstance(b, list):
+ if len(b) == 1:
+ out += " "*level + b[0] + '\n\n'
+ else:
+ out += blocks_to_rst(b, level+1)
+ else:
+ e = " "*level + b + '\n'
+ if not re_itemized.search(b):
+ pass
+ #e += '\n'
+ elif len(e) and e[0] == ' ':
+ # strip 1 leading blank
+ e = e[1:]
+ out += e
+ out += '\n'
+ return out
+
+ ld = unwrap_lines(ld)
+ bls = block_lines(ld)
+ return blocks_to_rst(bls)
+
+
+def underline_text(text, symbol):
+ underline = symbol * len(text)
+ return '%s\n%s\n' % (text, underline)
def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
- print pkg
# local binding for ease of use
pkgdb = db[pkg]
# do nothing if there is not at least the very basic stuff
if not pkgdb['main'].has_key('description'):
return
title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
- underline = '*' * (len(title) + 2)
- title = '%s\n %s\n%s' % (underline, title, underline)
+ title = underline_text(title, '*')
ex_dir = None
if 'sv' in pkgdb['main']:
def write_sourceslist(jinja_env, cfg, outdir):
create_dir(outdir)
- create_dir(os.path.join(outdir, '_static'))
+ create_dir(os.path.join(outdir, 'lists'))
repos = {}
for release in cfg.options('release codenames'):
transrel = trans_codename(release, cfg)
repos[transrel] = []
for mirror in cfg.options('mirrors'):
- listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
+ listname = '%s.%s' % (release, mirror)
repos[transrel].append((mirror, listname))
- lf = open(os.path.join(outdir, '_static', listname), 'w')
+ lf = open(os.path.join(outdir, 'lists', listname), 'w')
for rel in ('data', release):
aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
rel)
lf.write('#deb-src %s' % aptcfg)
lf.close()
+ id2codename = dict([(cfg.get('release backport ids', r), r)
+ for r in cfg.options('release codenames')])
+ id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
+ for r in cfg.options('release codenames')])
+ mirror2name = dict([(m, cfg.get('mirror names', m))
+ for m in cfg.options('mirrors')])
+ mirror2url = dict([(m, cfg.get('mirrors', m))
+ for m in cfg.options('mirrors')])
srclist_template = jinja_env.get_template('sources_lists.rst')
sl = open(os.path.join(outdir, 'sources_lists'), 'w')
- sl.write(srclist_template.render(repos=repos))
+ sl.write(srclist_template.render(id2codename=id2codename,
+ id2relname=id2relname,
+ mirror2name=mirror2name,
+ mirror2url=mirror2url))
sl.close()
+def sort_by_tasks(db):
+ tasks = {}
+ for pkg in db.keys():
+ if not 'blends' in db[pkg]:
+ # no blend info
+ continue
+ blendinfo = db[pkg]['blends']
+ if not 'tasks' in blendinfo:
+ # no task info in blend data
+ continue
+ taskinfo = blendinfo['tasks']
+ for task in taskinfo:
+ taskname = task[1]
+ if not taskname in tasks:
+ tasks[taskname] = []
+ else:
+ tasks[taskname].append(pkg)
+ return tasks
+
+
+def sort_by_maintainer(db):
+ maints = {}
+ maint_ids = {}
+ for pkg in db.keys():
+ maint = None
+ pkginfo = db[pkg]
+ # start with the blends info
+ if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
+ maint = pkginfo['blends']['responsible']
+ if not 'main' in db[pkg] and maint is None:
+ # no info
+ continue
+ info = db[pkg]['main']
+ if not 'maintainer' in info and maint is None:
+ # no maintainer info
+ continue
+ if 'original_maintainer' in info and not info['original_maintainer'] is None:
+ maint = info['original_maintainer']
+ elif 'maintainer' in info and not info['maintainer'] is None:
+ maint = info['maintainer']
+ if maint is None:
+ # no sane maintainer info
+ continue
+ # safeguard: <> confuses sphinx and we don't care about different emails
+ maint = maint[:maint.find('<')].strip()
+ # kick out non-ascii ones (should not be, but too tired to find the bug)
+ try:
+ codecs.ascii_decode(maint)
+ except UnicodeEncodeError:
+ continue
+ if not maint.lower() in maints:
+ maints[maint.lower()] = []
+ maint_ids[maint.lower()] = [maint]
+ else:
+ maint_ids[maint.lower()].append(maint)
+ maints[maint.lower()].append(pkg)
+ # remove duplicates
+ out = {}
+ for m in maints:
+ out[maint_ids[m][0]] = np.unique(maints[m])
+ return out
+
+
+def sort_by_release(db):
+ rels = {}
+ for pkg in db.keys():
+ pkginfo = db[pkg]
+ for sec in pkginfo:
+ if not isinstance(sec, tuple):
+ # only releases are of interest
+ continue
+ relname = sec[0]
+ if not relname in rels:
+ rels[relname] = []
+ else:
+ rels[relname].append(pkg)
+ # remove duplicates
+ for r in rels:
+ rels[r] = np.unique(rels[r])
+ return rels
+
+
def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
create_dir(outdir)
create_dir(os.path.join(outdir, 'pkgs'))
-
- # generate the TOC with all packages
+ create_dir(os.path.join(outdir, 'pkglists'))
+ # template for individual package listings
toc_template = jinja_env.get_template('pkgs_toc.rst')
- toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
- # this is a fragile test
- toc.write(toc_template.render(
- pkgs=[k for k in db.keys()
- if not ('Datasets', 'neurodebian-data') in db[k]]))
- toc.close()
- # and now only for dataset packages
- toc_template = jinja_env.get_template('datasets_toc.rst')
- toc = codecs.open(os.path.join(outdir, 'datasets.rst'), 'w', 'utf-8')
- # this is a fragile test
- toc.write(toc_template.render(
- pkgs=[k for k in db.keys()
- if ('Datasets', 'neurodebian-data') in db[k]]))
+ # the high-level package list overview
+ hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
+ hltoc.write('.. _pkglists:\n\n')
+ hltoc.write(underline_text('Software packages', '='))
+ defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
+ (sort_by_release(db), 'By release', 'Packages for %s'),
+ (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
+ for def_ in defs:
+ # TOC for each thingie
+ pkgsdict, sectitle, title_tmpl = def_
+ hltoc.write(underline_text(sectitle, '-'))
+ ids = pkgsdict.keys()
+ ids.sort()
+ for id_ in ids:
+ label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
+ # filter out crap
+ filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
+ if not len(filtered_pkgs):
+ continue
+ plist = toc_template.render(
+ label=label,
+ title=underline_text(title_tmpl % id_, '='),
+ pkgs=filtered_pkgs,
+ db=db)
+ toc = codecs.open(os.path.join(outdir,
+ 'pkglists',
+ '%s.rst' % label),
+ 'w', 'utf-8')
+ toc.write(plist)
+ toc.close()
+ hltoc.write('* :ref:`%s`\n' % label)
+ hltoc.write('\n\n')
+
+
+ # now a complete list of all packages
+ hltoc.write(underline_text('Complete list', '-'))
+ toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
+ 'w', 'utf-8')
+ toc.write(toc_template.render(label='full_pkg_list',
+ title=underline_text('Complete package list', '='),
+ pkgs=db.keys(), db=db))
toc.close()
-
+ hltoc.write('* :ref:`full_pkg_list`\n')
+ hltoc.close()
# and now each individual package page
pkg_template = jinja_env.get_template('pkg.rst')
cfg = SafeConfigParser()
cfg.read(opts.cfg)
+ if cmd == 'debug_ld':
+ # load the db from file
+ db = read_db(opts.db)
+
+ for p in db.keys():
+ #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
+ if not 'long_description' in db[p]['main']:
+ continue
+ ld = db[p]['main']['long_description']
+
+ print ">>>>>>>>> ", p
+ #print ld
+ print "----"
+ print convert_longdescr(ld)
+ raise SystemExit
# load existing db, unless renew is requested
if cmd == 'updatedb':
db = {}