Actually adding recently released Ubuntu 11.04 oneiric

[neurodebian.git] / neurodebian / dde.py
diff --git a/neurodebian/dde.py b/neurodebian/dde.py

old mode 100644 (file)

new mode 100755 (executable)

index 729b799..442123e
--- a/neurodebian/dde.py
+++ b/neurodebian/dde.py
@@ -5,17 +5,27 @@
  import pysvn
  import json
  from debian_bundle import deb822
+import numpy as np
+import jinja2
+
+# Lets first assure no guarding (but annoying) warnings
+import warnings
+warnings.simplefilter('ignore', FutureWarning)
+warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
+
  import apt
  from ConfigParser import SafeConfigParser
  from optparse import OptionParser, Option, OptionGroup, OptionConflictError
  import sys
  import os
+import copy
  import shutil
  import urllib2
  import urllib
  import codecs
  import subprocess
  import time
+import re
  # templating
  from jinja2 import Environment, PackageLoader
  
@@ -128,6 +138,8 @@ def add_pkgfromtaskfile(db, urls):
          for stanza in deb822.Packages.iter_paragraphs(fh):
              if stanza.has_key('Depends'):
                  pkg = stanza['Depends']
+            elif stanza.has_key('Recommends'):
+                pkg = stanza['Recommends']
              elif stanza.has_key('Suggests'):
                  pkg = stanza['Suggests']
              else:
@@ -148,7 +160,7 @@ def add_pkgfromtaskfile(db, urls):
  def get_emptydbentry():
      return {'main': {}}
  
-def import_blendstask(db, url):
+def import_blendstask(cfg, db, url):
      cache = AptListsCache()
      fh = cache.get(url)
      task_name = None
@@ -171,6 +183,8 @@ def import_blendstask(db, url):
  
          if st.has_key('Depends'):
              pkg = st['Depends']
+        elif st.has_key('Recommends'):
+            pkg = st['Recommends']
          elif st.has_key('Suggests'):
              pkg = st['Suggests']
          else:
@@ -213,7 +227,12 @@ def import_blendstask(db, url):
  
              # Publications
              if st.has_key('Published-Title'):
-                pub = {'title': st['Published-Title']}
+                title = st['Published-Title']
+                if title[-1] == '.':
+                    # trip trailing dot -- added later
+                    pub = {'title': title[:-1]}
+                else:
+                    pub = {'title': title}
                  if st.has_key('Published-Authors'):
                      pub['authors'] = st['Published-Authors']
                  if st.has_key('Published-Year'):
@@ -226,7 +245,7 @@ def import_blendstask(db, url):
                      pub['doi'] = st['Published-DOI']
                      # need at least one URL
                      if not pub.has_key('url'):
-                        pub['url'] = st['Published-DOI']
+                        pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
  
                  db[p]['main']['publication'] = pub
  
@@ -246,6 +265,16 @@ def import_blendstask(db, url):
                  # just add this tasks name and id
                  db[p]['blends']['tasks'].append(task)
  
+            # handle pkg name aliases
+            if p in cfg.options('blend package aliases'):
+                src_entry = db[p].copy()
+                # remove original entry
+                del db[p]
+                # copy the entry into all aliases
+                for alias in cfg.get('blend package aliases', p).split():
+                    print "Aliasing %s to %s" % (p, alias)
+                    db[alias] = copy.deepcopy(src_entry)
+
      return db
  
  
@@ -407,6 +436,56 @@ def dde_get(url, fail=False):
      except (StopIteration):
          print "NOINFO:", url
          return False
+    except json.ReadException, e:
+        print "UDD-DOWN?:", url, type(e)
+        return False
+
+
+def nitrc_get(spec, fail=False):
+    nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
+    try:
+        # change into this from python 2.6 on
+        #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
+        data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
+        print "NITRC-SUCCESS:", spec
+    except urllib2.HTTPError, e:
+        print "NITRC-NOINFO:", spec, type(e)
+        return False
+    except urllib2.URLError, e:
+        print "NITRC-URLERROR:", spec, type(e)
+        if fail:
+            print "Permanant failure"
+            return False
+        print "Try again after 30 seconds..."
+        time.sleep(30)
+        return nitrc_get(spec, fail=True)
+    return data
+
+
+def parse_nitrc(data):
+    if data is False:
+        return None
+    # simplify -- there is only one project in the data
+    project = data['projects'][0]
+    nitrc_filtered = {'downloads': 0,
+                      'id': project['id']}
+    for pkg in project['packages']:
+        for release in pkg['releases']:
+            for file in release['files']:
+                nitrc_filtered['downloads'] += file['download_count']
+    return nitrc_filtered
+
+
+def import_nitrc(cfg, db):
+    for p in db.keys():
+        if not cfg.has_option("nitrc ids", p):
+            continue
+        nitrc_spec = cfg.get("nitrc ids", p)
+        nitrc_data = nitrc_get(nitrc_spec)
+        nitrc_excerpt = parse_nitrc(nitrc_data)
+        if not nitrc_excerpt is None:
+            db[p]['nitrc'] = nitrc_excerpt
+    return db
  
  
  def import_dde(cfg, db):
@@ -433,7 +512,7 @@ def import_dde(cfg, db):
                  if q.has_key('popcon'):
                      db[p]['main']['debian_popcon'] = q['popcon']
                  # if we have debian, need to get ubuntu
-                q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
+                q = dde_get(query_url + "/packages/prio-ubuntu-oneiric/%s" % p)
                  if q and q.has_key('popcon'):
                      db[p]['main']['ubuntu_popcon'] = q['popcon']
              else:
@@ -475,41 +554,163 @@ def import_dde(cfg, db):
  
      return db
  
+def assure_unicode(s):
+    """Assure that argument is unicode
+
+    Necessary if strings are not carrying out Pythonish 'u' prefix to
+    signal UTF8 strings, but are in fact UTF8
+    """
+    if type(s) is unicode:
+        return s
+    elif type(s) is str:
+        # attempt regular unicode call and if fails -- just decode it
+        # into utf8
+        try:
+            return unicode(s)
+        except UnicodeDecodeError, e:
+            return s.decode('utf8')
+    else:
+        return assure_unicode(str(s))
+
  
  def convert_longdescr(ld):
+    """
+
+    yoh: I think all this long description conversion will keep giving
+    us problems since per se there is no strict regulations,
+    especially in blends files
+    """
+    descr = u''
      ld = ld.replace('% ', '%% ')
+    ld = ld.replace(r'\t', '    ') # just in case assuming tab 4
      ld = ld.split('\n')
-    for i, l in enumerate(ld):
-        if l == ' .':
-            ld[i] = ' #NEWLINEMARKER#'
-        # look for embedded lists
-        elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
-            ld[i] = ' #NEWLINEMARKER# ' + l[2:]
+    re_leadblanks = re.compile("^ *")
+    re_itemized = re.compile("^[o*-+] +")
+    re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
+    re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
+
+    def unwrap_lines(lines):
+        out = []
+        indent_levels = [-1]
+        for l in lines:
+            match = re_itemized_gr.search(l).groups()
+            if ((len(match[0]) in indent_levels and match[1] is None)
+                or (len(match[0]) > max(indent_levels)+4)) \
+                and match[2].strip() != '.':
+                # append to previous
+                if not out[-1].endswith(" "):
+                    out[-1] += " "
+                out[-1] += match[2]
+            else:
+                out.append(l)
+
+            indent_levels = [len(match[0])]
+            if match[1] is not None:
+                indent_levels += [len(match[0]) + len(match[1])]
+            if match[2].strip() == '.':
+                # reset though if '.'
+                indent_levels = [-1]
+        return out
+
+    def dedent_withlevel(lines):
+        """Dedent `lines` given in a list provide dedented lines and how much was dedented
+        """
+        nleading = min([re_leadblanks.search(l).span()[1]
+                        for l in lines])
+        return [l[nleading:] for l in lines], nleading
+
+    def block_lines(ld, level=0):
+        # so we got list of lines
+        # dedent all of them first
+        ld, level = dedent_withlevel(ld)
+
+        # lets collect them in blocks/paragraphs
+        # 1. into paragraphs split by '.'
+        blocks, block = [], None
+
+        # next block can begin if
+        #  1.  . line
+        #  2. it was an itemized list and all items begin with
+        #     the same symbol or get further indented accordingly
+        #     so let's first check if it is an itemized list
+        itemized_match = re_itemized.search(ld[0])
+        if itemized_match:
+            allow_indents = " "*itemized_match.span()[1]
+        else:
+            allow_indents = None
+        for l in ld:
+            if block is None or l.strip() == '.' \
+                   or (len(l) and ( len(block) and (
+                (l.startswith(' ') and not block[-1].startswith(' '))
+                or
+                (not l.startswith(' ') and block[-1].startswith(' '))))):
+                block = []
+                blocks.append(block)
+            if l.strip() != '.':
+                block.append(l)
+        if len(blocks) == 1:
+            return blocks[0]
+        else:
+            return [block_lines(b, level+1) for b in blocks if len(b)]
+
+    def blocks_to_rst(bls, level=0):
+        # check if this block is an itemized beast
+        #itemized_match = re_itemized_gr.search(bls[0][0])
+        #if itemized_match:
+        #    res += ' 'allow_indents = " "*itemized_match.span()[1]
+        out = ''
+        for b in bls:
+            if isinstance(b, list):
+                if len(b) == 1:
+                    out += " "*level + b[0] + '\n\n'
+                else:
+                    out += blocks_to_rst(b, level+1)
+            else:
+                e = " "*level + b + '\n'
+                if not re_itemized.search(b):
+                    pass
+                    #e += '\n'
+                elif len(e) and e[0] == ' ':
+                    # strip 1 leading blank
+                    e = e[1:]
+                out += e
+        out += '\n'
+        return out
+
+    ld = unwrap_lines(ld)
+    bls = block_lines(ld)
+    return blocks_to_rst(bls)
  
-    ld = u' '.join([l[1:] for l in ld])
-    ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
-    # cleanup any leftover (e.g. trailing markers)
-    ld = ld.replace('#NEWLINEMARKER#', '')
-    return ld
  
+def underline_text(text, symbol):
+    underline = symbol * len(text)
+    return '%s\n%s\n' % (text, underline)
  
-def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
+
+def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
      # local binding for ease of use
      pkgdb = db[pkg]
      # do nothing if there is not at least the very basic stuff
      if not pkgdb['main'].has_key('description'):
          return
      title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
-    underline = '*' * (len(title) + 2)
-    title = '%s\n %s\n%s' % (underline, title, underline)
+    title = underline_text(title, '*')
  
+    ex_dir = None
+    if 'sv' in pkgdb['main']:
+        ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
+        if not os.path.exists(ex_dir):
+            ex_dir = None
      page = template.render(
              pkg=pkg,
              title=title,
-            long_description=convert_longdescr(pkgdb['main']['long_description']),
+            long_description=convert_longdescr(
+                assure_unicode(pkgdb['main']['long_description'])),
              cfg=cfg,
              db=pkgdb,
-            fulldb=db)
+            fulldb=db,
+            extracts_dir=ex_dir,
+            op=os.path)
      # the following can be replaced by something like
      # {% include "sidebar.html" ignore missing %}
      # in the template whenever jinja 2.2 becomes available
@@ -533,42 +734,180 @@ def read_db(filename):
  
  def write_sourceslist(jinja_env, cfg, outdir):
      create_dir(outdir)
-    create_dir(os.path.join(outdir, '_static'))
+    create_dir(os.path.join(outdir, 'lists'))
  
      repos = {}
      for release in cfg.options('release codenames'):
+        if release == 'data':
+            # no seperate list for the data archive
+            continue
          transrel = trans_codename(release, cfg)
          repos[transrel] = []
          for mirror in cfg.options('mirrors'):
-            listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
+            listname = '%s.%s' % (release, mirror)
              repos[transrel].append((mirror, listname))
-            lf = open(os.path.join(outdir, '_static', listname), 'w')
-            aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
-                                                      release)
-            lf.write('deb %s' % aptcfg)
-            lf.write('deb-src %s' % aptcfg)
+            lf = open(os.path.join(outdir, 'lists', listname), 'w')
+            for rel in ('data', release):
+                aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
+                                                          rel)
+                lf.write('deb %s' % aptcfg)
+                lf.write('#deb-src %s' % aptcfg)
              lf.close()
  
+    id2codename = dict([(cfg.get('release backport ids', r), r)
+                            for r in cfg.options('release codenames')])
+    id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
+                            for r in cfg.options('release codenames')])
+    mirror2name = dict([(m, cfg.get('mirror names', m))
+                            for m in cfg.options('mirrors')])
+    mirror2url = dict([(m, cfg.get('mirrors', m))
+                            for m in cfg.options('mirrors')])
      srclist_template = jinja_env.get_template('sources_lists.rst')
      sl = open(os.path.join(outdir, 'sources_lists'), 'w')
-    sl.write(srclist_template.render(repos=repos))
+    sl.write(srclist_template.render(id2codename=id2codename,
+                                     id2relname=id2relname,
+                                     mirror2name=mirror2name,
+                                     mirror2url=mirror2url))
      sl.close()
  
  
-def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
+def sort_by_tasks(db):
+    tasks = {}
+    for pkg in db.keys():
+        if not 'blends' in db[pkg]:
+            # no blend info
+            continue
+        blendinfo = db[pkg]['blends']
+        if not 'tasks' in blendinfo:
+            # no task info in blend data
+            continue
+        taskinfo = blendinfo['tasks']
+        for task in taskinfo:
+            taskname = task[1]
+            if not taskname in tasks:
+                tasks[taskname] = []
+            else:
+                tasks[taskname].append(pkg)
+    return tasks
+
+
+def sort_by_maintainer(db):
+    maints = {}
+    maint_ids = {}
+    for pkg in db.keys():
+        maint = None
+        pkginfo = db[pkg]
+        # start with the blends info
+        if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
+            maint = pkginfo['blends']['responsible']
+        if not 'main' in db[pkg] and maint is None:
+            # no info
+            continue
+        info = db[pkg]['main']
+        if not 'maintainer' in info and maint is None:
+            # no maintainer info
+            continue
+        if 'original_maintainer' in info and not info['original_maintainer'] is None:
+            maint = info['original_maintainer']
+        elif 'maintainer' in info and not info['maintainer'] is None:
+            maint = info['maintainer']
+        if maint is None:
+            # no sane maintainer info
+            continue
+        # safeguard: <> confuses sphinx and we don't care about different emails
+        maint = maint[:maint.find('<')].strip()
+        # kick out non-ascii ones (should not be, but too tired to find the bug)
+        try:
+            codecs.ascii_decode(maint)
+        except UnicodeEncodeError:
+            continue
+        if not maint.lower() in maints:
+            maints[maint.lower()] = []
+            maint_ids[maint.lower()] = [maint]
+        else:
+            maint_ids[maint.lower()].append(maint)
+        maints[maint.lower()].append(pkg)
+    # remove duplicates
+    out = {}
+    for m in maints:
+        out[maint_ids[m][0]] = np.unique(maints[m])
+    return out
+
+
+def sort_by_release(db):
+    rels = {}
+    for pkg in db.keys():
+        pkginfo = db[pkg]
+        for sec in pkginfo:
+            if not isinstance(sec, tuple):
+                # only releases are of interest
+                continue
+            relname = sec[0]
+            if not relname in rels:
+                rels[relname] = []
+            else:
+                rels[relname].append(pkg)
+    # remove duplicates
+    for r in rels:
+        rels[r] = np.unique(rels[r])
+    return rels
+
+
+def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
      create_dir(outdir)
      create_dir(os.path.join(outdir, 'pkgs'))
-
-    # generate the TOC with all packages
+    create_dir(os.path.join(outdir, 'pkglists'))
+    # template for individual package listings
      toc_template = jinja_env.get_template('pkgs_toc.rst')
-    toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
-    toc.write(toc_template.render(pkgs=db.keys()))
+    # the high-level package list overview
+    hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
+    hltoc.write('.. _pkglists:\n\n')
+    hltoc.write(underline_text('Software packages', '='))
+    defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
+            (sort_by_release(db), 'By release', 'Packages for %s'),
+            (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
+    for def_ in defs:
+        # TOC for each thingie
+        pkgsdict, sectitle, title_tmpl = def_
+        hltoc.write(underline_text(sectitle, '-'))
+        ids = pkgsdict.keys()
+        ids.sort()
+        for id_ in ids:
+            label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
+            # filter out crap
+            filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
+            if not len(filtered_pkgs):
+                continue
+            plist = toc_template.render(
+                        label=label,
+                        title=underline_text(title_tmpl % id_, '='),
+                        pkgs=filtered_pkgs,
+                        db=db)
+            toc = codecs.open(os.path.join(outdir,
+                                           'pkglists',
+                                           '%s.rst' % label),
+                              'w', 'utf-8')
+            toc.write(plist)
+            toc.close()
+            hltoc.write('* :ref:`%s`\n' % label)
+        hltoc.write('\n\n')
+
+
+    # now a complete list of all packages
+    hltoc.write(underline_text('Complete list', '-'))
+    toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
+                      'w', 'utf-8')
+    toc.write(toc_template.render(label='full_pkg_list',
+                title=underline_text('Complete package list', '='),
+                pkgs=db.keys(), db=db))
      toc.close()
+    hltoc.write('* :ref:`full_pkg_list`\n')
+    hltoc.close()
  
      # and now each individual package page
      pkg_template = jinja_env.get_template('pkg.rst')
      for p in db.keys():
-        page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
+        page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
          # when no page is available skip this package
          if page is None:
              continue
@@ -603,6 +942,9 @@ def prepOptParser(op):
      op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
                    type="string", default=None, help="None")
  
+    op.add_option("--extracts", action="store", dest="extracts_dir",
+                  type="string", default=None, help="None")
+
  
  def main():
      op = OptionParser(version="%prog 0.0.2")
@@ -627,6 +969,21 @@ def main():
      cfg = SafeConfigParser()
      cfg.read(opts.cfg)
  
+    if cmd == 'debug_ld':
+        # load the db from file
+        db = read_db(opts.db)
+
+        for p in db.keys():
+        #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
+            if not 'long_description' in db[p]['main']:
+                continue
+            ld = db[p]['main']['long_description']
+
+            print ">>>>>>>>> ", p
+            #print ld
+            print "----"
+            print convert_longdescr(ld)
+        raise SystemExit
      # load existing db, unless renew is requested
      if cmd == 'updatedb':
          db = {}
@@ -643,7 +1000,7 @@ def main():
          # get info from task files
          if cfg.has_option('packages', 'prospective'):
              for url in cfg.get('packages', 'prospective').split():
-                db = import_blendstask(db, url)
+                db = import_blendstask(cfg, db, url)
  
          # parse NeuroDebian repository
          if cfg.has_option('neurodebian', 'releases'):
@@ -652,6 +1009,8 @@ def main():
  
          # collect package information from DDE
          db = import_dde(cfg, db)
+        # get info from NITRC
+        db = import_nitrc(cfg, db)
          # store the new DB
          store_db(db, opts.db)
          # and be done
@@ -664,7 +1023,7 @@ def main():
      jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
  
      # generate package pages and TOC and write them to files
-    write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
+    write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
  
      write_sourceslist(jinja_env, cfg, opts.outdir)