X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=neurodebian%2Fdde.py;h=442123e253f5370d3ac068d3d0f8046d1b8569c2;hb=4c3f3d31f12dde512f5d2cc1056d0263727f29e4;hp=852a488bd9d1820fa106be17f3010d409ae0c961;hpb=9fdfb67d9beafe8b2c7f4fdec75a80284c6819dc;p=neurodebian.git diff --git a/neurodebian/dde.py b/neurodebian/dde.py index 852a488..442123e 100755 --- a/neurodebian/dde.py +++ b/neurodebian/dde.py @@ -5,6 +5,8 @@ import pysvn import json from debian_bundle import deb822 +import numpy as np +import jinja2 # Lets first assure no guarding (but annoying) warnings import warnings @@ -16,12 +18,14 @@ from ConfigParser import SafeConfigParser from optparse import OptionParser, Option, OptionGroup, OptionConflictError import sys import os +import copy import shutil import urllib2 import urllib import codecs import subprocess import time +import re # templating from jinja2 import Environment, PackageLoader @@ -134,6 +138,8 @@ def add_pkgfromtaskfile(db, urls): for stanza in deb822.Packages.iter_paragraphs(fh): if stanza.has_key('Depends'): pkg = stanza['Depends'] + elif stanza.has_key('Recommends'): + pkg = stanza['Recommends'] elif stanza.has_key('Suggests'): pkg = stanza['Suggests'] else: @@ -154,7 +160,7 @@ def add_pkgfromtaskfile(db, urls): def get_emptydbentry(): return {'main': {}} -def import_blendstask(db, url): +def import_blendstask(cfg, db, url): cache = AptListsCache() fh = cache.get(url) task_name = None @@ -177,6 +183,8 @@ def import_blendstask(db, url): if st.has_key('Depends'): pkg = st['Depends'] + elif st.has_key('Recommends'): + pkg = st['Recommends'] elif st.has_key('Suggests'): pkg = st['Suggests'] else: @@ -219,7 +227,12 @@ def import_blendstask(db, url): # Publications if st.has_key('Published-Title'): - pub = {'title': st['Published-Title']} + title = st['Published-Title'] + if title[-1] == '.': + # trip trailing dot -- added later + pub = {'title': title[:-1]} + else: + pub = {'title': title} if st.has_key('Published-Authors'): pub['authors'] = st['Published-Authors'] if st.has_key('Published-Year'): @@ -232,7 +245,7 @@ def import_blendstask(db, url): pub['doi'] = st['Published-DOI'] # need at least one URL if not pub.has_key('url'): - pub['url'] = st['Published-DOI'] + pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI'] db[p]['main']['publication'] = pub @@ -252,6 +265,16 @@ def import_blendstask(db, url): # just add this tasks name and id db[p]['blends']['tasks'].append(task) + # handle pkg name aliases + if p in cfg.options('blend package aliases'): + src_entry = db[p].copy() + # remove original entry + del db[p] + # copy the entry into all aliases + for alias in cfg.get('blend package aliases', p).split(): + print "Aliasing %s to %s" % (p, alias) + db[alias] = copy.deepcopy(src_entry) + return db @@ -421,6 +444,8 @@ def dde_get(url, fail=False): def nitrc_get(spec, fail=False): nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php' try: + # change into this from python 2.6 on + #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read()) data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read()) print "NITRC-SUCCESS:", spec except urllib2.HTTPError, e: @@ -487,7 +512,7 @@ def import_dde(cfg, db): if q.has_key('popcon'): db[p]['main']['debian_popcon'] = q['popcon'] # if we have debian, need to get ubuntu - q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p) + q = dde_get(query_url + "/packages/prio-ubuntu-oneiric/%s" % p) if q and q.has_key('popcon'): db[p]['main']['ubuntu_popcon'] = q['popcon'] else: @@ -529,41 +554,163 @@ def import_dde(cfg, db): return db +def assure_unicode(s): + """Assure that argument is unicode + + Necessary if strings are not carrying out Pythonish 'u' prefix to + signal UTF8 strings, but are in fact UTF8 + """ + if type(s) is unicode: + return s + elif type(s) is str: + # attempt regular unicode call and if fails -- just decode it + # into utf8 + try: + return unicode(s) + except UnicodeDecodeError, e: + return s.decode('utf8') + else: + return assure_unicode(str(s)) + def convert_longdescr(ld): + """ + + yoh: I think all this long description conversion will keep giving + us problems since per se there is no strict regulations, + especially in blends files + """ + descr = u'' ld = ld.replace('% ', '%% ') + ld = ld.replace(r'\t', ' ') # just in case assuming tab 4 ld = ld.split('\n') - for i, l in enumerate(ld): - if l == ' .': - ld[i] = ' #NEWLINEMARKER#' - # look for embedded lists - elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*': - ld[i] = ' #NEWLINEMARKER# ' + l[2:] + re_leadblanks = re.compile("^ *") + re_itemized = re.compile("^[o*-+] +") + re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$") + re_description_gr = re.compile("^( *[^-]+ - )(.*?)$") + + def unwrap_lines(lines): + out = [] + indent_levels = [-1] + for l in lines: + match = re_itemized_gr.search(l).groups() + if ((len(match[0]) in indent_levels and match[1] is None) + or (len(match[0]) > max(indent_levels)+4)) \ + and match[2].strip() != '.': + # append to previous + if not out[-1].endswith(" "): + out[-1] += " " + out[-1] += match[2] + else: + out.append(l) + + indent_levels = [len(match[0])] + if match[1] is not None: + indent_levels += [len(match[0]) + len(match[1])] + if match[2].strip() == '.': + # reset though if '.' + indent_levels = [-1] + return out + + def dedent_withlevel(lines): + """Dedent `lines` given in a list provide dedented lines and how much was dedented + """ + nleading = min([re_leadblanks.search(l).span()[1] + for l in lines]) + return [l[nleading:] for l in lines], nleading + + def block_lines(ld, level=0): + # so we got list of lines + # dedent all of them first + ld, level = dedent_withlevel(ld) + + # lets collect them in blocks/paragraphs + # 1. into paragraphs split by '.' + blocks, block = [], None + + # next block can begin if + # 1. . line + # 2. it was an itemized list and all items begin with + # the same symbol or get further indented accordingly + # so let's first check if it is an itemized list + itemized_match = re_itemized.search(ld[0]) + if itemized_match: + allow_indents = " "*itemized_match.span()[1] + else: + allow_indents = None + for l in ld: + if block is None or l.strip() == '.' \ + or (len(l) and ( len(block) and ( + (l.startswith(' ') and not block[-1].startswith(' ')) + or + (not l.startswith(' ') and block[-1].startswith(' '))))): + block = [] + blocks.append(block) + if l.strip() != '.': + block.append(l) + if len(blocks) == 1: + return blocks[0] + else: + return [block_lines(b, level+1) for b in blocks if len(b)] + + def blocks_to_rst(bls, level=0): + # check if this block is an itemized beast + #itemized_match = re_itemized_gr.search(bls[0][0]) + #if itemized_match: + # res += ' 'allow_indents = " "*itemized_match.span()[1] + out = '' + for b in bls: + if isinstance(b, list): + if len(b) == 1: + out += " "*level + b[0] + '\n\n' + else: + out += blocks_to_rst(b, level+1) + else: + e = " "*level + b + '\n' + if not re_itemized.search(b): + pass + #e += '\n' + elif len(e) and e[0] == ' ': + # strip 1 leading blank + e = e[1:] + out += e + out += '\n' + return out + + ld = unwrap_lines(ld) + bls = block_lines(ld) + return blocks_to_rst(bls) - ld = u' '.join([l[1:] for l in ld]) - ld = ld.replace('#NEWLINEMARKER# ', '\n\n') - # cleanup any leftover (e.g. trailing markers) - ld = ld.replace('#NEWLINEMARKER#', '') - return ld +def underline_text(text, symbol): + underline = symbol * len(text) + return '%s\n%s\n' % (text, underline) -def generate_pkgpage(pkg, cfg, db, template, addenum_dir): + +def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir): # local binding for ease of use pkgdb = db[pkg] # do nothing if there is not at least the very basic stuff if not pkgdb['main'].has_key('description'): return title = '**%s** -- %s' % (pkg, pkgdb['main']['description']) - underline = '*' * (len(title) + 2) - title = '%s\n %s\n%s' % (underline, title, underline) + title = underline_text(title, '*') + ex_dir = None + if 'sv' in pkgdb['main']: + ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0]) + if not os.path.exists(ex_dir): + ex_dir = None page = template.render( pkg=pkg, title=title, - long_description=convert_longdescr(pkgdb['main']['long_description']), + long_description=convert_longdescr( + assure_unicode(pkgdb['main']['long_description'])), cfg=cfg, db=pkgdb, - fulldb=db) + fulldb=db, + extracts_dir=ex_dir, + op=os.path) # the following can be replaced by something like # {% include "sidebar.html" ignore missing %} # in the template whenever jinja 2.2 becomes available @@ -587,42 +734,180 @@ def read_db(filename): def write_sourceslist(jinja_env, cfg, outdir): create_dir(outdir) - create_dir(os.path.join(outdir, '_static')) + create_dir(os.path.join(outdir, 'lists')) repos = {} for release in cfg.options('release codenames'): + if release == 'data': + # no seperate list for the data archive + continue transrel = trans_codename(release, cfg) repos[transrel] = [] for mirror in cfg.options('mirrors'): - listname = 'neurodebian.%s.%s.sources.list' % (release, mirror) + listname = '%s.%s' % (release, mirror) repos[transrel].append((mirror, listname)) - lf = open(os.path.join(outdir, '_static', listname), 'w') - aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror), - release) - lf.write('deb %s' % aptcfg) - lf.write('deb-src %s' % aptcfg) + lf = open(os.path.join(outdir, 'lists', listname), 'w') + for rel in ('data', release): + aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror), + rel) + lf.write('deb %s' % aptcfg) + lf.write('#deb-src %s' % aptcfg) lf.close() + id2codename = dict([(cfg.get('release backport ids', r), r) + for r in cfg.options('release codenames')]) + id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg)) + for r in cfg.options('release codenames')]) + mirror2name = dict([(m, cfg.get('mirror names', m)) + for m in cfg.options('mirrors')]) + mirror2url = dict([(m, cfg.get('mirrors', m)) + for m in cfg.options('mirrors')]) srclist_template = jinja_env.get_template('sources_lists.rst') sl = open(os.path.join(outdir, 'sources_lists'), 'w') - sl.write(srclist_template.render(repos=repos)) + sl.write(srclist_template.render(id2codename=id2codename, + id2relname=id2relname, + mirror2name=mirror2name, + mirror2url=mirror2url)) sl.close() -def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir): +def sort_by_tasks(db): + tasks = {} + for pkg in db.keys(): + if not 'blends' in db[pkg]: + # no blend info + continue + blendinfo = db[pkg]['blends'] + if not 'tasks' in blendinfo: + # no task info in blend data + continue + taskinfo = blendinfo['tasks'] + for task in taskinfo: + taskname = task[1] + if not taskname in tasks: + tasks[taskname] = [] + else: + tasks[taskname].append(pkg) + return tasks + + +def sort_by_maintainer(db): + maints = {} + maint_ids = {} + for pkg in db.keys(): + maint = None + pkginfo = db[pkg] + # start with the blends info + if 'blends' in pkginfo and 'responsible' in pkginfo['blends']: + maint = pkginfo['blends']['responsible'] + if not 'main' in db[pkg] and maint is None: + # no info + continue + info = db[pkg]['main'] + if not 'maintainer' in info and maint is None: + # no maintainer info + continue + if 'original_maintainer' in info and not info['original_maintainer'] is None: + maint = info['original_maintainer'] + elif 'maintainer' in info and not info['maintainer'] is None: + maint = info['maintainer'] + if maint is None: + # no sane maintainer info + continue + # safeguard: <> confuses sphinx and we don't care about different emails + maint = maint[:maint.find('<')].strip() + # kick out non-ascii ones (should not be, but too tired to find the bug) + try: + codecs.ascii_decode(maint) + except UnicodeEncodeError: + continue + if not maint.lower() in maints: + maints[maint.lower()] = [] + maint_ids[maint.lower()] = [maint] + else: + maint_ids[maint.lower()].append(maint) + maints[maint.lower()].append(pkg) + # remove duplicates + out = {} + for m in maints: + out[maint_ids[m][0]] = np.unique(maints[m]) + return out + + +def sort_by_release(db): + rels = {} + for pkg in db.keys(): + pkginfo = db[pkg] + for sec in pkginfo: + if not isinstance(sec, tuple): + # only releases are of interest + continue + relname = sec[0] + if not relname in rels: + rels[relname] = [] + else: + rels[relname].append(pkg) + # remove duplicates + for r in rels: + rels[r] = np.unique(rels[r]) + return rels + + +def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir): create_dir(outdir) create_dir(os.path.join(outdir, 'pkgs')) - - # generate the TOC with all packages + create_dir(os.path.join(outdir, 'pkglists')) + # template for individual package listings toc_template = jinja_env.get_template('pkgs_toc.rst') - toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8') - toc.write(toc_template.render(pkgs=db.keys())) + # the high-level package list overview + hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8') + hltoc.write('.. _pkglists:\n\n') + hltoc.write(underline_text('Software packages', '=')) + defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'), + (sort_by_release(db), 'By release', 'Packages for %s'), + (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')] + for def_ in defs: + # TOC for each thingie + pkgsdict, sectitle, title_tmpl = def_ + hltoc.write(underline_text(sectitle, '-')) + ids = pkgsdict.keys() + ids.sort() + for id_ in ids: + label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_') + # filter out crap + filtered_pkgs = [p for p in pkgsdict[id_] if p in db] + if not len(filtered_pkgs): + continue + plist = toc_template.render( + label=label, + title=underline_text(title_tmpl % id_, '='), + pkgs=filtered_pkgs, + db=db) + toc = codecs.open(os.path.join(outdir, + 'pkglists', + '%s.rst' % label), + 'w', 'utf-8') + toc.write(plist) + toc.close() + hltoc.write('* :ref:`%s`\n' % label) + hltoc.write('\n\n') + + + # now a complete list of all packages + hltoc.write(underline_text('Complete list', '-')) + toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'), + 'w', 'utf-8') + toc.write(toc_template.render(label='full_pkg_list', + title=underline_text('Complete package list', '='), + pkgs=db.keys(), db=db)) toc.close() + hltoc.write('* :ref:`full_pkg_list`\n') + hltoc.close() # and now each individual package page pkg_template = jinja_env.get_template('pkg.rst') for p in db.keys(): - page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir) + page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir) # when no page is available skip this package if page is None: continue @@ -657,6 +942,9 @@ def prepOptParser(op): op.add_option("--pkgaddenum", action="store", dest="addenum_dir", type="string", default=None, help="None") + op.add_option("--extracts", action="store", dest="extracts_dir", + type="string", default=None, help="None") + def main(): op = OptionParser(version="%prog 0.0.2") @@ -681,6 +969,21 @@ def main(): cfg = SafeConfigParser() cfg.read(opts.cfg) + if cmd == 'debug_ld': + # load the db from file + db = read_db(opts.db) + + for p in db.keys(): + #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']: + if not 'long_description' in db[p]['main']: + continue + ld = db[p]['main']['long_description'] + + print ">>>>>>>>> ", p + #print ld + print "----" + print convert_longdescr(ld) + raise SystemExit # load existing db, unless renew is requested if cmd == 'updatedb': db = {} @@ -697,7 +1000,7 @@ def main(): # get info from task files if cfg.has_option('packages', 'prospective'): for url in cfg.get('packages', 'prospective').split(): - db = import_blendstask(db, url) + db = import_blendstask(cfg, db, url) # parse NeuroDebian repository if cfg.has_option('neurodebian', 'releases'): @@ -720,7 +1023,7 @@ def main(): jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates')) # generate package pages and TOC and write them to files - write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir) + write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir) write_sourceslist(jinja_env, cfg, opts.outdir)