X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=neurodebian%2Fdde.py;h=442123e253f5370d3ac068d3d0f8046d1b8569c2;hb=4d62dbbcd335fe951760dfe16d0aa27cb4f745cb;hp=c80ab18ceac10a347ec1a00542dd15b53688c979;hpb=8e10fad174a8de48fd05686b09370a0a58ec787c;p=neurodebian.git diff --git a/neurodebian/dde.py b/neurodebian/dde.py index c80ab18..442123e 100755 --- a/neurodebian/dde.py +++ b/neurodebian/dde.py @@ -5,6 +5,8 @@ import pysvn import json from debian_bundle import deb822 +import numpy as np +import jinja2 # Lets first assure no guarding (but annoying) warnings import warnings @@ -510,7 +512,7 @@ def import_dde(cfg, db): if q.has_key('popcon'): db[p]['main']['debian_popcon'] = q['popcon'] # if we have debian, need to get ubuntu - q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p) + q = dde_get(query_url + "/packages/prio-ubuntu-oneiric/%s" % p) if q and q.has_key('popcon'): db[p]['main']['ubuntu_popcon'] = q['popcon'] else: @@ -572,34 +574,127 @@ def assure_unicode(s): def convert_longdescr(ld): + """ + + yoh: I think all this long description conversion will keep giving + us problems since per se there is no strict regulations, + especially in blends files + """ + descr = u'' ld = ld.replace('% ', '%% ') + ld = ld.replace(r'\t', ' ') # just in case assuming tab 4 ld = ld.split('\n') - for i, l in enumerate(ld): - if l == ' .': - ld[i] = ' #NEWLINEMARKER#' - # look for embedded lists - elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*': - ld[i] = ' #NEWLINEMARKER# ' + l[2:] - - ld = u' '.join([l[1:] for l in ld]) - ld = ld.replace('#NEWLINEMARKER# ', '\n\n') - # cleanup any leftover (e.g. trailing markers) - ld = ld.replace('#NEWLINEMARKER#', '') - # safe-guard ReST active symbols - ld = re.sub(r'([\'`*])', r'\\\1', ld) - return ld + re_leadblanks = re.compile("^ *") + re_itemized = re.compile("^[o*-+] +") + re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$") + re_description_gr = re.compile("^( *[^-]+ - )(.*?)$") + + def unwrap_lines(lines): + out = [] + indent_levels = [-1] + for l in lines: + match = re_itemized_gr.search(l).groups() + if ((len(match[0]) in indent_levels and match[1] is None) + or (len(match[0]) > max(indent_levels)+4)) \ + and match[2].strip() != '.': + # append to previous + if not out[-1].endswith(" "): + out[-1] += " " + out[-1] += match[2] + else: + out.append(l) + + indent_levels = [len(match[0])] + if match[1] is not None: + indent_levels += [len(match[0]) + len(match[1])] + if match[2].strip() == '.': + # reset though if '.' + indent_levels = [-1] + return out + + def dedent_withlevel(lines): + """Dedent `lines` given in a list provide dedented lines and how much was dedented + """ + nleading = min([re_leadblanks.search(l).span()[1] + for l in lines]) + return [l[nleading:] for l in lines], nleading + + def block_lines(ld, level=0): + # so we got list of lines + # dedent all of them first + ld, level = dedent_withlevel(ld) + + # lets collect them in blocks/paragraphs + # 1. into paragraphs split by '.' + blocks, block = [], None + + # next block can begin if + # 1. . line + # 2. it was an itemized list and all items begin with + # the same symbol or get further indented accordingly + # so let's first check if it is an itemized list + itemized_match = re_itemized.search(ld[0]) + if itemized_match: + allow_indents = " "*itemized_match.span()[1] + else: + allow_indents = None + for l in ld: + if block is None or l.strip() == '.' \ + or (len(l) and ( len(block) and ( + (l.startswith(' ') and not block[-1].startswith(' ')) + or + (not l.startswith(' ') and block[-1].startswith(' '))))): + block = [] + blocks.append(block) + if l.strip() != '.': + block.append(l) + if len(blocks) == 1: + return blocks[0] + else: + return [block_lines(b, level+1) for b in blocks if len(b)] + + def blocks_to_rst(bls, level=0): + # check if this block is an itemized beast + #itemized_match = re_itemized_gr.search(bls[0][0]) + #if itemized_match: + # res += ' 'allow_indents = " "*itemized_match.span()[1] + out = '' + for b in bls: + if isinstance(b, list): + if len(b) == 1: + out += " "*level + b[0] + '\n\n' + else: + out += blocks_to_rst(b, level+1) + else: + e = " "*level + b + '\n' + if not re_itemized.search(b): + pass + #e += '\n' + elif len(e) and e[0] == ' ': + # strip 1 leading blank + e = e[1:] + out += e + out += '\n' + return out + + ld = unwrap_lines(ld) + bls = block_lines(ld) + return blocks_to_rst(bls) + + +def underline_text(text, symbol): + underline = symbol * len(text) + return '%s\n%s\n' % (text, underline) def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir): - print pkg # local binding for ease of use pkgdb = db[pkg] # do nothing if there is not at least the very basic stuff if not pkgdb['main'].has_key('description'): return title = '**%s** -- %s' % (pkg, pkgdb['main']['description']) - underline = '*' * (len(title) + 2) - title = '%s\n %s\n%s' % (underline, title, underline) + title = underline_text(title, '*') ex_dir = None if 'sv' in pkgdb['main']: @@ -676,27 +771,138 @@ def write_sourceslist(jinja_env, cfg, outdir): sl.close() +def sort_by_tasks(db): + tasks = {} + for pkg in db.keys(): + if not 'blends' in db[pkg]: + # no blend info + continue + blendinfo = db[pkg]['blends'] + if not 'tasks' in blendinfo: + # no task info in blend data + continue + taskinfo = blendinfo['tasks'] + for task in taskinfo: + taskname = task[1] + if not taskname in tasks: + tasks[taskname] = [] + else: + tasks[taskname].append(pkg) + return tasks + + +def sort_by_maintainer(db): + maints = {} + maint_ids = {} + for pkg in db.keys(): + maint = None + pkginfo = db[pkg] + # start with the blends info + if 'blends' in pkginfo and 'responsible' in pkginfo['blends']: + maint = pkginfo['blends']['responsible'] + if not 'main' in db[pkg] and maint is None: + # no info + continue + info = db[pkg]['main'] + if not 'maintainer' in info and maint is None: + # no maintainer info + continue + if 'original_maintainer' in info and not info['original_maintainer'] is None: + maint = info['original_maintainer'] + elif 'maintainer' in info and not info['maintainer'] is None: + maint = info['maintainer'] + if maint is None: + # no sane maintainer info + continue + # safeguard: <> confuses sphinx and we don't care about different emails + maint = maint[:maint.find('<')].strip() + # kick out non-ascii ones (should not be, but too tired to find the bug) + try: + codecs.ascii_decode(maint) + except UnicodeEncodeError: + continue + if not maint.lower() in maints: + maints[maint.lower()] = [] + maint_ids[maint.lower()] = [maint] + else: + maint_ids[maint.lower()].append(maint) + maints[maint.lower()].append(pkg) + # remove duplicates + out = {} + for m in maints: + out[maint_ids[m][0]] = np.unique(maints[m]) + return out + + +def sort_by_release(db): + rels = {} + for pkg in db.keys(): + pkginfo = db[pkg] + for sec in pkginfo: + if not isinstance(sec, tuple): + # only releases are of interest + continue + relname = sec[0] + if not relname in rels: + rels[relname] = [] + else: + rels[relname].append(pkg) + # remove duplicates + for r in rels: + rels[r] = np.unique(rels[r]) + return rels + + def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir): create_dir(outdir) create_dir(os.path.join(outdir, 'pkgs')) - - # generate the TOC with all packages + create_dir(os.path.join(outdir, 'pkglists')) + # template for individual package listings toc_template = jinja_env.get_template('pkgs_toc.rst') - toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8') - # this is a fragile test - toc.write(toc_template.render( - pkgs=[k for k in db.keys() - if not ('Datasets (data)', 'neurodebian-data') in db[k]])) - toc.close() - # and now only for dataset packages - toc_template = jinja_env.get_template('datasets_toc.rst') - toc = codecs.open(os.path.join(outdir, 'datasets.rst'), 'w', 'utf-8') - # this is a fragile test - toc.write(toc_template.render( - pkgs=[k for k in db.keys() - if ('Datasets (data)', 'neurodebian-data') in db[k]])) + # the high-level package list overview + hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8') + hltoc.write('.. _pkglists:\n\n') + hltoc.write(underline_text('Software packages', '=')) + defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'), + (sort_by_release(db), 'By release', 'Packages for %s'), + (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')] + for def_ in defs: + # TOC for each thingie + pkgsdict, sectitle, title_tmpl = def_ + hltoc.write(underline_text(sectitle, '-')) + ids = pkgsdict.keys() + ids.sort() + for id_ in ids: + label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_') + # filter out crap + filtered_pkgs = [p for p in pkgsdict[id_] if p in db] + if not len(filtered_pkgs): + continue + plist = toc_template.render( + label=label, + title=underline_text(title_tmpl % id_, '='), + pkgs=filtered_pkgs, + db=db) + toc = codecs.open(os.path.join(outdir, + 'pkglists', + '%s.rst' % label), + 'w', 'utf-8') + toc.write(plist) + toc.close() + hltoc.write('* :ref:`%s`\n' % label) + hltoc.write('\n\n') + + + # now a complete list of all packages + hltoc.write(underline_text('Complete list', '-')) + toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'), + 'w', 'utf-8') + toc.write(toc_template.render(label='full_pkg_list', + title=underline_text('Complete package list', '='), + pkgs=db.keys(), db=db)) toc.close() - + hltoc.write('* :ref:`full_pkg_list`\n') + hltoc.close() # and now each individual package page pkg_template = jinja_env.get_template('pkg.rst') @@ -763,6 +969,21 @@ def main(): cfg = SafeConfigParser() cfg.read(opts.cfg) + if cmd == 'debug_ld': + # load the db from file + db = read_db(opts.db) + + for p in db.keys(): + #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']: + if not 'long_description' in db[p]['main']: + continue + ld = db[p]['main']['long_description'] + + print ">>>>>>>>> ", p + #print ld + print "----" + print convert_longdescr(ld) + raise SystemExit # load existing db, unless renew is requested if cmd == 'updatedb': db = {}