Replace a good chunk of code with the new bigmess

[neurodebian.git] / neurodebian / dde.py
diff --git a/neurodebian/dde.py b/neurodebian/dde.py

index 143c814ce7d87e2eceffa54d582389d9a2f8e1a3..a85459b2b259736c8db8a48a5d25b9217c2a1a91 100755 (executable)
--- a/neurodebian/dde.py
+++ b/neurodebian/dde.py
@@ -4,18 +4,21 @@
  
  import pysvn
  import json
-from debian_bundle import deb822
  import numpy as np
-import jinja2
+
+from ConfigParser import SafeConfigParser
+from optparse import OptionParser, OptionGroup, OptionConflictError
  
  # Lets first assure no guarding (but annoying) warnings
  import warnings
  warnings.simplefilter('ignore', FutureWarning)
-warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
+warnings.filterwarnings('ignore',
+                        'Module debian_bundle was already imported.*', UserWarning)
+
+from debian import deb822
+import apt                              # initializes the "_system" ;)
+from apt_pkg import version_compare
  
-import apt
-from ConfigParser import SafeConfigParser
-from optparse import OptionParser, Option, OptionGroup, OptionConflictError
  import sys
  import os
  import copy
@@ -26,6 +29,7 @@ import codecs
  import subprocess
  import time
  import re
+
  # templating
  from jinja2 import Environment, PackageLoader
  
@@ -419,7 +423,7 @@ def dde_get(url, fail=False):
      # enforce delay to be friendly to DDE
      time.sleep(3)
      try:
-        data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
+        data = json.load(urllib2.urlopen(url+"?t=json"))['r']
          print "SUCCESS:", url
          return data
      except urllib2.HTTPError, e:
@@ -436,7 +440,7 @@ def dde_get(url, fail=False):
      except (StopIteration):
          print "NOINFO:", url
          return False
-    except json.ReadException, e:
+    except Exception, e:
          print "UDD-DOWN?:", url, type(e)
          return False
  
@@ -446,7 +450,7 @@ def nitrc_get(spec, fail=False):
      try:
          # change into this from python 2.6 on
          #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
-        data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
+        data = json.load(urllib2.urlopen(nitrc_url + '?spec=%s' % spec))
          print "NITRC-SUCCESS:", spec
      except urllib2.HTTPError, e:
          print "NITRC-NOINFO:", spec, type(e)
@@ -512,7 +516,7 @@ def import_dde(cfg, db):
                  if q.has_key('popcon'):
                      db[p]['main']['debian_popcon'] = q['popcon']
                  # if we have debian, need to get ubuntu
-                q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
+                q = dde_get(query_url + "/packages/prio-ubuntu-precise/%s" % p)
                  if q and q.has_key('popcon'):
                      db[p]['main']['ubuntu_popcon'] = q['popcon']
              else:
@@ -534,8 +538,8 @@ def import_dde(cfg, db):
                  info[distkey]['architecture'] = [info[distkey]['architecture']]
              # accumulate data for multiple over archs
              else:
-                comp = apt.VersionCompare(cp['version'],
-                                          info[distkey]['version'])
+                comp = version_compare(cp['version'],
+                                                   info[distkey]['version'])
                  # found another arch for the same version
                  if comp == 0:
                      info[distkey]['architecture'].append(cp['architecture'])
@@ -574,22 +578,112 @@ def assure_unicode(s):
  
  
  def convert_longdescr(ld):
+    """
+
+    yoh: I think all this long description conversion will keep giving
+    us problems since per se there is no strict regulations,
+    especially in blends files
+    """
+    descr = u''
      ld = ld.replace('% ', '%% ')
+    ld = ld.replace(r'\t', '    ') # just in case assuming tab 4
      ld = ld.split('\n')
-    for i, l in enumerate(ld):
-        if l == ' .':
-            ld[i] = ' #NEWLINEMARKER#'
-        # look for embedded lists
-        elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
-            ld[i] = ' #NEWLINEMARKER# ' + l[2:]
-
-    ld = u' '.join([l[1:] for l in ld])
-    ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
-    # cleanup any leftover (e.g. trailing markers)
-    ld = ld.replace('#NEWLINEMARKER#', '')
-    # safe-guard ReST active symbols
-    ld = re.sub(r'([\'`*])', r'\\\1', ld)
-    return ld
+    re_leadblanks = re.compile("^ *")
+    re_itemized = re.compile("^[o*-+] +")
+    re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
+    re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
+
+    def unwrap_lines(lines):
+        out = []
+        indent_levels = [-1]
+        for l in lines:
+            match = re_itemized_gr.search(l).groups()
+            if ((len(match[0]) in indent_levels and match[1] is None)
+                or (len(match[0]) > max(indent_levels)+4)) \
+                and match[2].strip() != '.':
+                # append to previous
+                if not out[-1].endswith(" "):
+                    out[-1] += " "
+                out[-1] += match[2]
+            else:
+                out.append(l)
+
+            indent_levels = [len(match[0])]
+            if match[1] is not None:
+                indent_levels += [len(match[0]) + len(match[1])]
+            if match[2].strip() == '.':
+                # reset though if '.'
+                indent_levels = [-1]
+        return out
+
+    def dedent_withlevel(lines):
+        """Dedent `lines` given in a list provide dedented lines and how much was dedented
+        """
+        nleading = min([re_leadblanks.search(l).span()[1]
+                        for l in lines])
+        return [l[nleading:] for l in lines], nleading
+
+    def block_lines(ld, level=0):
+        # so we got list of lines
+        # dedent all of them first
+        ld, level = dedent_withlevel(ld)
+
+        # lets collect them in blocks/paragraphs
+        # 1. into paragraphs split by '.'
+        blocks, block = [], None
+
+        # next block can begin if
+        #  1.  . line
+        #  2. it was an itemized list and all items begin with
+        #     the same symbol or get further indented accordingly
+        #     so let's first check if it is an itemized list
+        itemized_match = re_itemized.search(ld[0])
+        if itemized_match:
+            allow_indents = " "*itemized_match.span()[1]
+        else:
+            allow_indents = None
+        for l in ld:
+            if block is None or l.strip() == '.' \
+                   or (len(l) and ( len(block) and (
+                (l.startswith(' ') and not block[-1].startswith(' '))
+                or
+                (not l.startswith(' ') and block[-1].startswith(' '))))):
+                block = []
+                blocks.append(block)
+            if l.strip() != '.':
+                block.append(l)
+        if len(blocks) == 1:
+            return blocks[0]
+        else:
+            return [block_lines(b, level+1) for b in blocks if len(b)]
+
+    def blocks_to_rst(bls, level=0):
+        # check if this block is an itemized beast
+        #itemized_match = re_itemized_gr.search(bls[0][0])
+        #if itemized_match:
+        #    res += ' 'allow_indents = " "*itemized_match.span()[1]
+        out = ''
+        for b in bls:
+            if isinstance(b, list):
+                if len(b) == 1:
+                    out += " "*level + b[0] + '\n\n'
+                else:
+                    out += blocks_to_rst(b, level+1)
+            else:
+                e = " "*level + b + '\n'
+                if not re_itemized.search(b):
+                    pass
+                    #e += '\n'
+                elif len(e) and e[0] == ' ':
+                    # strip 1 leading blank
+                    e = e[1:]
+                out += e
+        out += '\n'
+        return out
+
+    ld = unwrap_lines(ld)
+    bls = block_lines(ld)
+    return blocks_to_rst(bls)
  
  
  def underline_text(text, symbol):
@@ -611,11 +705,14 @@ def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
          ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
          if not os.path.exists(ex_dir):
              ex_dir = None
+    long_description = 'Description missing'
+    if 'long_description' in pkgdb['main']:
+        long_description=convert_longdescr(
+                    assure_unicode(pkgdb['main']['long_description']))
      page = template.render(
              pkg=pkg,
              title=title,
-            long_description=convert_longdescr(
-                assure_unicode(pkgdb['main']['long_description'])),
+            long_description=long_description,
              cfg=cfg,
              db=pkgdb,
              fulldb=db,
@@ -681,6 +778,25 @@ def write_sourceslist(jinja_env, cfg, outdir):
      sl.close()
  
  
+def write_mirmonlists(cfg, outdir):
+    """Write list of mirrors in the format suitable for mirmon
+
+    It will reuse the same 'lists' directory
+    """
+    print "I: Composing mirmon lists"
+    outdir = os.path.join(outdir, 'lists')
+    create_dir(outdir)
+
+    for sec, sep in (('mirrors', ' '),
+                         ('mirror names', ' - ')):
+        entries = ['%s%s%s' % (mirror, sep, cfg.get(sec, mirror))
+                   for mirror in cfg.options('mirrors')]
+        f = open(os.path.join(outdir, 'mirmon-%s.txt' % sec.replace(' ', '-')),
+                 'w')
+        f.write('\n'.join(entries + ['']))
+        f.close()
+
+
  def sort_by_tasks(db):
      tasks = {}
      for pkg in db.keys():
@@ -703,31 +819,45 @@ def sort_by_tasks(db):
  
  def sort_by_maintainer(db):
      maints = {}
+    maint_ids = {}
      for pkg in db.keys():
-        if not 'main' in db[pkg]:
+        maint = None
+        pkginfo = db[pkg]
+        # start with the blends info
+        if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
+            maint = pkginfo['blends']['responsible']
+        if not 'main' in db[pkg] and maint is None:
              # no info
              continue
          info = db[pkg]['main']
-        if not 'maintainer' in info:
+        if not 'maintainer' in info and maint is None:
              # no maintainer info
              continue
          if 'original_maintainer' in info and not info['original_maintainer'] is None:
              maint = info['original_maintainer']
-        else:
+        elif 'maintainer' in info and not info['maintainer'] is None:
              maint = info['maintainer']
          if maint is None:
              # no sane maintainer info
              continue
          # safeguard: <> confuses sphinx and we don't care about different emails
          maint = maint[:maint.find('<')].strip()
-        if not maint in maints:
-            maints[maint] = []
+        # kick out non-ascii ones (should not be, but too tired to find the bug)
+        try:
+            codecs.ascii_decode(maint)
+        except UnicodeEncodeError:
+            continue
+        if not maint.lower() in maints:
+            maints[maint.lower()] = []
+            maint_ids[maint.lower()] = [maint]
          else:
-            maints[maint].append(pkg)
+            maint_ids[maint.lower()].append(maint)
+        maints[maint.lower()].append(pkg)
      # remove duplicates
+    out = {}
      for m in maints:
-        maints[m] = np.unique(maints[m])
-    return maints
+        out[maint_ids[m][0]] = np.unique(maints[m])
+    return out
  
  
  def sort_by_release(db):
@@ -767,31 +897,24 @@ def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
          pkgsdict, sectitle, title_tmpl = def_
          hltoc.write(underline_text(sectitle, '-'))
          ids = pkgsdict.keys()
-        for id_ in np.unique(ids):
+        ids.sort()
+        for id_ in ids:
              label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
-            if not len(pkgsdict[id_]):
+            # filter out crap
+            filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
+            if not len(filtered_pkgs):
                  continue
-            try:
-                plist = toc_template.render(
-                            label=label,
-                            title=underline_text(title_tmpl % id_, '='),
-                            pkgs=pkgsdict[id_],
-                            db=db)
-                if not plist:
-                    continue
-                toc = codecs.open(os.path.join(outdir,
-                                               'pkglists',
-                                               '%s.rst' % label),
-                                  'w', 'utf-8')
-                toc.write(toc_template.render(
-                            label=label,
-                            title=underline_text(title_tmpl % id_, '='),
-                            pkgs=pkgsdict[id_],
-                            db=db))
-                toc.close()
-            except jinja2.exceptions.UndefinedError:
-                # ignore crap
-                pass
+            plist = toc_template.render(
+                        label=label,
+                        title=underline_text(title_tmpl % id_, '='),
+                        pkgs=filtered_pkgs,
+                        db=db)
+            toc = codecs.open(os.path.join(outdir,
+                                           'pkglists',
+                                           '%s.rst' % label),
+                              'w', 'utf-8')
+            toc.write(plist)
+            toc.close()
              hltoc.write('* :ref:`%s`\n' % label)
          hltoc.write('\n\n')
  
@@ -872,6 +995,21 @@ def main():
      cfg = SafeConfigParser()
      cfg.read(opts.cfg)
  
+    if cmd == 'debug_ld':
+        # load the db from file
+        db = read_db(opts.db)
+
+        for p in db.keys():
+        #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
+            if not 'long_description' in db[p]['main']:
+                continue
+            ld = db[p]['main']['long_description']
+
+            print ">>>>>>>>> ", p
+            #print ld
+            print "----"
+            print convert_longdescr(ld)
+        raise SystemExit
      # load existing db, unless renew is requested
      if cmd == 'updatedb':
          db = {}
@@ -915,5 +1053,7 @@ def main():
  
      write_sourceslist(jinja_env, cfg, opts.outdir)
  
+    write_mirmonlists(cfg, opts.outdir)
+
  if __name__ == "__main__":
      main()