neurodebian/dde.py

   1 #!/usr/bin/env python
   2 """Tell me who you are!
   3 """
   4
   5 import pysvn
   6 import json
   7 from debian_bundle import deb822
   8 import numpy as np
   9 import jinja2
  10
  11 # Lets first assure no guarding (but annoying) warnings
  12 import warnings
  13 warnings.simplefilter('ignore', FutureWarning)
  14 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
  15
  16 import apt
  17 from ConfigParser import SafeConfigParser
  18 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
  19 import sys
  20 import os
  21 import copy
  22 import shutil
  23 import urllib2
  24 import urllib
  25 import codecs
  26 import subprocess
  27 import time
  28 import re
  29 # templating
  30 from jinja2 import Environment, PackageLoader
  31
  32 from pprint import PrettyPrinter
  33
  34
  35 class AptListsCache(object):
  36     def __init__(self, cachedir='build/cache',
  37                  ro_cachedirs=None,
  38                  init_db=None):
  39         self.cachedir = cachedir
  40
  41         if not ro_cachedirs is None:
  42             self.ro_cachedirs = ro_cachedirs
  43         else:
  44             self.ro_cachedirs = []
  45
  46         # create cachedir
  47         create_dir(self.cachedir)
  48
  49     def get(self, url, update=False):
  50         """Looks in the cache if the file is there and takes the cached one.
  51         Otherwise it is downloaded first.
  52
  53         Knows how to deal with http:// and svn:// URLs.
  54
  55         :Return:
  56           file handler
  57         """
  58         # look whether it is compressed
  59         cext = url.split('.')[-1]
  60         if cext in ['gz', 'bz2']:
  61             target_url = url[:-1 * len(cext) -1]
  62         else:
  63             # assume not compressed
  64             target_url = url
  65             cext = None
  66
  67         # turn url into a filename -- mimik what APT does for
  68         # /var/lib/apt/lists/
  69         tfilename = '_'.join(target_url.split('/')[2:])
  70
  71         # if we need to download anyway do not search
  72         if update:
  73             cfilename = os.path.join(self.cachedir, tfilename)
  74         else:
  75             # look for the uncompressed file anywhere in the cache
  76             cfilename = None
  77             for cp in [self.cachedir] + self.ro_cachedirs:
  78                 if os.path.exists(os.path.join(cp, tfilename)):
  79                     cfilename = os.path.join(cp, tfilename)
  80
  81         # nothing found?
  82         if cfilename is None:
  83             # add cache item
  84             cfilename = os.path.join(self.cachedir, tfilename)
  85             update = True
  86
  87         # if updated needed -- download
  88         if update:
  89             #print 'Caching file from %s' % url
  90
  91             if url.startswith('svn://'):
  92                 # export from SVN
  93                 pysvn.Client().export(url, cfilename)
  94             if url.startswith('http://'):
  95                 # download
  96                 tempfile, ignored = urllib.urlretrieve(url)
  97
  98                 # decompress
  99                 decompressor = None
 100                 if cext == 'gz':
 101                     decompressor = 'gzip'
 102                 elif cext == 'bz2':
 103                     decompressor = 'bzip2'
 104                 elif cext == None:
 105                     decompressor = None
 106                 else:
 107                     raise ValueError, \
 108                           "Don't know how to decompress %s files" \
 109                           % cext
 110
 111                 if not decompressor is None:
 112                     if subprocess.call([decompressor, '-d', '-q', '-f',
 113                                        tempfile]) == 1:
 114                         raise RuntimeError, \
 115                               "Something went wrong while decompressing '%s'" \
 116                               % tempfile
 117
 118                 # move decompressed file into cache
 119                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
 120
 121                 # XXX do we need that if explicit filename is provided?
 122                 urllib.urlcleanup()
 123
 124         # open cached file
 125         fh = codecs.open(cfilename, 'r', 'utf-8')
 126
 127         return fh
 128
 129
 130 def add_pkgfromtaskfile(db, urls):
 131     cache = AptListsCache()
 132     pkgs = []
 133
 134     for task in urls:
 135         fh = cache.get(task)
 136
 137         # loop over all stanzas
 138         for stanza in deb822.Packages.iter_paragraphs(fh):
 139             if stanza.has_key('Depends'):
 140                 pkg = stanza['Depends']
 141             elif stanza.has_key('Recommends'):
 142                 pkg = stanza['Recommends']
 143             elif stanza.has_key('Suggests'):
 144                 pkg = stanza['Suggests']
 145             else:
 146                 continue
 147
 148             # account for multiple packages per line
 149             if pkg.count(','):
 150                 pkgs += [p.strip() for p in pkg.split(',')]
 151             else:
 152                 pkgs.append(pkg.strip())
 153
 154     for p in pkgs:
 155         if not db.has_key(p):
 156             db[p] = get_emptydbentry()
 157
 158     return db
 159
 160 def get_emptydbentry():
 161     return {'main': {}}
 162
 163 def import_blendstask(cfg, db, url):
 164     cache = AptListsCache()
 165     fh = cache.get(url)
 166     task_name = None
 167
 168     # figure out blend's task page URL, since they differ from blend to blend
 169     urlsec = url.split('/')
 170     blendname = urlsec[-3]
 171     if blendname == 'debian-med':
 172         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
 173     elif blendname == 'debian-science':
 174         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
 175     else:
 176         raise ValueError('Unknown blend "%s"' % blendname)
 177     taskpage_url += urlsec[-1]
 178
 179     for st in deb822.Packages.iter_paragraphs(fh):
 180         if st.has_key('Task'):
 181             task_name = st['Task']
 182             task = (blendname, task_name, taskpage_url)
 183
 184         if st.has_key('Depends'):
 185             pkg = st['Depends']
 186         elif st.has_key('Recommends'):
 187             pkg = st['Recommends']
 188         elif st.has_key('Suggests'):
 189             pkg = st['Suggests']
 190         else:
 191 #            print 'Warning: Cannot determine name of prospective package ' \
 192 #                    '... ignoring. Dump follows:'
 193 #            print st
 194             continue
 195
 196         # take care of pkg lists
 197         for p in pkg.split(', '):
 198             if not db.has_key(p):
 199                 print 'Ignoring blend package "%s"' % p
 200                 continue
 201
 202             info = {}
 203
 204             # blends info
 205             info['tasks'] = [task]
 206             if st.has_key('License'):
 207                 info['license'] = st['License']
 208             if st.has_key('Responsible'):
 209                 info['responsible'] = st['Responsible']
 210
 211             # pkg description
 212             if st.has_key('Pkg-Description'):
 213                 descr = st['Pkg-Description'].split('\n')
 214                 info['description'] = descr[0].strip()
 215                 info['long_description'] = \
 216                         u'\n'.join(descr[1:])
 217
 218                 # charge the basic property set
 219                 db[p]['main']['description'] = info['description']
 220                 db[p]['main']['long_description'] = info['long_description']
 221             if st.has_key('WNPP'):
 222                 db[p]['main']['debian_itp'] = st['WNPP']
 223             if st.has_key('Pkg-URL'):
 224                 db[p]['main']['other_pkg'] = st['Pkg-URL']
 225             if st.has_key('Homepage'):
 226                 db[p]['main']['homepage'] = st['Homepage']
 227
 228             # Publications
 229             if st.has_key('Published-Title'):
 230                 title = st['Published-Title']
 231                 if title[-1] == '.':
 232                     # trip trailing dot -- added later
 233                     pub = {'title': title[:-1]}
 234                 else:
 235                     pub = {'title': title}
 236                 if st.has_key('Published-Authors'):
 237                     pub['authors'] = st['Published-Authors']
 238                 if st.has_key('Published-Year'):
 239                     pub['year'] = st['Published-Year']
 240                 if st.has_key('Published-In'):
 241                     pub['in'] = st['Published-In']
 242                 if st.has_key('Published-URL'):
 243                     pub['url'] = st['Published-URL']
 244                 if st.has_key('Published-DOI'):
 245                     pub['doi'] = st['Published-DOI']
 246                     # need at least one URL
 247                     if not pub.has_key('url'):
 248                         pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
 249
 250                 db[p]['main']['publication'] = pub
 251
 252             # Registration
 253             if st.has_key('Registration'):
 254                 db[p]['main']['registration'] = st['Registration']
 255
 256             # Remarks
 257             if st.has_key('Remark'):
 258                 # prepend a single space to make it look like a long description
 259                 info['remark'] = convert_longdescr(' ' + st['Remark'])
 260
 261             # only store if there isn't something already
 262             if not db[p].has_key('blends'):
 263                 db[p]['blends'] = info
 264             else:
 265                 # just add this tasks name and id
 266                 db[p]['blends']['tasks'].append(task)
 267
 268             # handle pkg name aliases
 269             if p in cfg.options('blend package aliases'):
 270                 src_entry = db[p].copy()
 271                 # remove original entry
 272                 del db[p]
 273                 # copy the entry into all aliases
 274                 for alias in cfg.get('blend package aliases', p).split():
 275                     print "Aliasing %s to %s" % (p, alias)
 276                     db[alias] = copy.deepcopy(src_entry)
 277
 278     return db
 279
 280
 281 def get_releaseinfo(rurl):
 282     cache = AptListsCache()
 283     # root URL of the repository
 284     baseurl = '/'.join(rurl.split('/')[:-1])
 285     # get the release file from the cache
 286     release_file = cache.get(rurl)
 287
 288     # create parser instance
 289     rp = deb822.Release(release_file)
 290
 291     # architectures on this dist
 292     archs = rp['Architectures'].split()
 293     components = rp['Components'].split()
 294     # compile a new codename that also considers the repository label
 295     # to distinguish between official and unofficial repos.
 296     label = rp['Label']
 297     origin = rp['Origin']
 298     codename = rp['Codename']
 299     labelcode = '_'.join([rp['Label'], rp['Codename']])
 300
 301     # cleanup
 302     release_file.close()
 303
 304     return {'baseurl': baseurl, 'archs': archs, 'components': components,
 305             'codename': codename, 'label': label, 'labelcode': labelcode,
 306             'origin': origin}
 307
 308
 309 def build_pkgsurl(baseurl, component, arch):
 310     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
 311
 312
 313 def import_release(cfg, db, rurl):
 314     cache = AptListsCache()
 315
 316     ri = get_releaseinfo(rurl)
 317
 318     # compile the list of Packages files to parse and parse them
 319     for c in ri['components']:
 320         for a in ri['archs']:
 321             # compile packages URL
 322             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
 323
 324             # retrieve from cache
 325             packages_file = cache.get(pkgsurl)
 326
 327             # parse
 328             for stanza in deb822.Packages.iter_paragraphs(packages_file):
 329                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
 330
 331             # cleanup
 332             packages_file.close()
 333
 334     return db
 335
 336 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
 337     """
 338     :Parameter:
 339       st: Package section
 340     """
 341     pkg = st['Package']
 342
 343     # only care for known packages
 344     if not db.has_key(pkg):
 345 #        print 'Ignoring NeuroDebian package "%s"' % pkg
 346         return db
 347
 348     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
 349
 350     if db[pkg].has_key(distkey):
 351         info = db[pkg][distkey]
 352     else:
 353         info = {'architecture': []}
 354
 355     # fill in data
 356     if not st['Architecture'] in info['architecture']:
 357         info['architecture'].append(st['Architecture'])
 358     info['maintainer'] = st['Maintainer']
 359     if st.has_key('Homepage'):
 360         info['homepage'] = st['Homepage']
 361     info['version'] = st['Version']
 362
 363     # origin
 364     info['distribution'] = origin
 365     info['release'] = codename
 366     info['component'] = component
 367
 368     # pool url
 369     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
 370
 371     # pkg description
 372     descr = st['Description'].replace('%', '%%').split('\n')
 373     info['description'] = descr[0].strip()
 374     info['long_description'] = u'\n'.join(descr[1:])
 375
 376     db[pkg][distkey] = info
 377
 378     # charge the basic property set
 379     db[pkg]['main']['description'] = info['description']
 380     db[pkg]['main']['long_description'] = info['long_description']
 381     if st.has_key('Source'):
 382         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
 383     else:
 384         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
 385     if st.has_key('Homepage'):
 386         db[pkg]['main']['homepage'] = st['Homepage']
 387     if st.has_key('Recommends'):
 388         db[pkg]['main']['recommends'] = st['Recommends']
 389
 390     return db
 391
 392
 393 def trans_codename(codename, cfg):
 394     """Translate a known codename into a release description.
 395
 396     Unknown codenames will simply be returned as is.
 397     """
 398     # if we know something, tell
 399     if codename in cfg.options('release codenames'):
 400         return cfg.get('release codenames', codename)
 401     else:
 402         return codename
 403
 404
 405 def create_dir(path):
 406     if os.path.exists(path):
 407         return
 408
 409     ps = path.split(os.path.sep)
 410
 411     for i in range(1,len(ps) + 1):
 412         p = os.path.sep.join(ps[:i])
 413
 414         if not os.path.exists(p):
 415             os.mkdir(p)
 416
 417
 418 def dde_get(url, fail=False):
 419     # enforce delay to be friendly to DDE
 420     time.sleep(3)
 421     try:
 422         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
 423         print "SUCCESS:", url
 424         return data
 425     except urllib2.HTTPError, e:
 426         print "NOINFO:", url, type(e)
 427         return False
 428     except urllib2.URLError, e:
 429         print "URLERROR:", url, type(e)
 430         if fail:
 431             print "Permanant failure"
 432             return False
 433         print "Try again after 30 seconds..."
 434         time.sleep(30)
 435         return dde_get(url, fail=True)
 436     except (StopIteration):
 437         print "NOINFO:", url
 438         return False
 439     except json.ReadException, e:
 440         print "UDD-DOWN?:", url, type(e)
 441         return False
 442
 443
 444 def nitrc_get(spec, fail=False):
 445     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
 446     try:
 447         # change into this from python 2.6 on
 448         #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
 449         data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
 450         print "NITRC-SUCCESS:", spec
 451     except urllib2.HTTPError, e:
 452         print "NITRC-NOINFO:", spec, type(e)
 453         return False
 454     except urllib2.URLError, e:
 455         print "NITRC-URLERROR:", spec, type(e)
 456         if fail:
 457             print "Permanant failure"
 458             return False
 459         print "Try again after 30 seconds..."
 460         time.sleep(30)
 461         return nitrc_get(spec, fail=True)
 462     return data
 463
 464
 465 def parse_nitrc(data):
 466     if data is False:
 467         return None
 468     # simplify -- there is only one project in the data
 469     project = data['projects'][0]
 470     nitrc_filtered = {'downloads': 0,
 471                       'id': project['id']}
 472     for pkg in project['packages']:
 473         for release in pkg['releases']:
 474             for file in release['files']:
 475                 nitrc_filtered['downloads'] += file['download_count']
 476     return nitrc_filtered
 477
 478
 479 def import_nitrc(cfg, db):
 480     for p in db.keys():
 481         if not cfg.has_option("nitrc ids", p):
 482             continue
 483         nitrc_spec = cfg.get("nitrc ids", p)
 484         nitrc_data = nitrc_get(nitrc_spec)
 485         nitrc_excerpt = parse_nitrc(nitrc_data)
 486         if not nitrc_excerpt is None:
 487             db[p]['nitrc'] = nitrc_excerpt
 488     return db
 489
 490
 491 def import_dde(cfg, db):
 492     query_url = cfg.get('dde', 'pkgquery_url')
 493     for p in db.keys():
 494         # get freshest
 495         q = dde_get(query_url + "/packages/all/%s" % p)
 496         if q:
 497             # copy all stuff, while preserving non-overlapping information
 498             for k, v in q.iteritems():
 499                 db[p]['main'][k] = v
 500             # get latest popcon info for debian and ubuntu
 501             # cannot use origin field itself, since it is none for few packages
 502             # i.e. python-nifti
 503             origin = q['drc'].split()[0]
 504             if origin == 'ubuntu':
 505                 if q.has_key('popcon'):
 506                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 507                 # if we have ubuntu, need to get debian
 508                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
 509                 if q and q.has_key('popcon'):
 510                     db[p]['main']['debian_popcon'] = q['popcon']
 511             elif origin == 'debian':
 512                 if q.has_key('popcon'):
 513                     db[p]['main']['debian_popcon'] = q['popcon']
 514                 # if we have debian, need to get ubuntu
 515                 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
 516                 if q and q.has_key('popcon'):
 517                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 518             else:
 519                 print("Ignoring unkown origin '%s' for package '%s'." \
 520                         % (origin, p))
 521
 522         # now get info for package from all releases in UDD
 523         q = dde_get(query_url + "/dist/p:%s" % p)
 524         if not q:
 525             continue
 526         # hold all info about this package per distribution release
 527         info = {}
 528         for cp in q:
 529             distkey = (trans_codename(cp['release'], cfg),
 530                        "%s-%s" % (cp['distribution'], cp['release']))
 531             if not info.has_key(distkey):
 532                 info[distkey] = cp
 533                 # turn into a list to append others later
 534                 info[distkey]['architecture'] = [info[distkey]['architecture']]
 535             # accumulate data for multiple over archs
 536             else:
 537                 comp = apt.VersionCompare(cp['version'],
 538                                           info[distkey]['version'])
 539                 # found another arch for the same version
 540                 if comp == 0:
 541                     info[distkey]['architecture'].append(cp['architecture'])
 542                 # found newer version, dump the old ones
 543                 elif comp > 0:
 544                     info[distkey] = cp
 545                     # turn into a list to append others later
 546                     info[distkey]['architecture'] = [info[distkey]['architecture']]
 547                 # simply ignore older versions
 548                 else:
 549                     pass
 550
 551         # finally assign the new package data
 552         for k, v in info.iteritems():
 553             db[p][k] = v
 554
 555     return db
 556
 557 def assure_unicode(s):
 558     """Assure that argument is unicode
 559
 560     Necessary if strings are not carrying out Pythonish 'u' prefix to
 561     signal UTF8 strings, but are in fact UTF8
 562     """
 563     if type(s) is unicode:
 564         return s
 565     elif type(s) is str:
 566         # attempt regular unicode call and if fails -- just decode it
 567         # into utf8
 568         try:
 569             return unicode(s)
 570         except UnicodeDecodeError, e:
 571             return s.decode('utf8')
 572     else:
 573         return assure_unicode(str(s))
 574
 575
 576 def convert_longdescr(ld):
 577     descr = u''
 578     ld = ld.replace('% ', '%% ')
 579     ld = ld.split('\n')
 580     isindented = False
 581     for i, l in enumerate(ld):
 582         if l == ' .':
 583             isindented = False
 584             ld[i] = ' #NEWLINEMARKER# '
 585         # look for embedded lists
 586         elif len(l) >=3 and l[:2] == '  ':
 587             if l[2] in '-*':
 588                 isindented = False
 589                 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
 590             elif not isindented:
 591                 ld[i] = ' \n::\n\n' + l
 592                 isindented = True
 593             else:
 594                 # leave as is
 595                 ld[i] = ' %s\n' % l
 596         descr += ld[i][1:]
 597
 598     descr = descr.replace('#NEWLINEMARKER# ', '\n\n')
 599     # cleanup any leftover (e.g. trailing markers)
 600     descr = descr.replace('#NEWLINEMARKER#', '')
 601     # safe-guard ReST active symbols
 602     descr = re.sub(r'([\'`*])', r'\\\1', descr)
 603     return descr
 604
 605
 606 def underline_text(text, symbol):
 607     underline = symbol * len(text)
 608     return '%s\n%s\n' % (text, underline)
 609
 610
 611 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
 612     # local binding for ease of use
 613     pkgdb = db[pkg]
 614     # do nothing if there is not at least the very basic stuff
 615     if not pkgdb['main'].has_key('description'):
 616         return
 617     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
 618     title = underline_text(title, '*')
 619
 620     ex_dir = None
 621     if 'sv' in pkgdb['main']:
 622         ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
 623         if not os.path.exists(ex_dir):
 624             ex_dir = None
 625     page = template.render(
 626             pkg=pkg,
 627             title=title,
 628             long_description=convert_longdescr(
 629                 assure_unicode(pkgdb['main']['long_description'])),
 630             cfg=cfg,
 631             db=pkgdb,
 632             fulldb=db,
 633             extracts_dir=ex_dir,
 634             op=os.path)
 635     # the following can be replaced by something like
 636     # {% include "sidebar.html" ignore missing %}
 637     # in the template whenever jinja 2.2 becomes available
 638     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
 639     if os.path.exists(addenum):
 640         page += '\n\n.. include:: %s\n' % addenum
 641     return page
 642
 643
 644 def store_db(db, filename):
 645     pp = PrettyPrinter(indent=2)
 646     f = codecs.open(filename, 'w', 'utf-8')
 647     f.write(pp.pformat(db))
 648     f.close()
 649
 650
 651 def read_db(filename):
 652     f = codecs.open(filename, 'r', 'utf-8')
 653     db = eval(f.read())
 654     return db
 655
 656 def write_sourceslist(jinja_env, cfg, outdir):
 657     create_dir(outdir)
 658     create_dir(os.path.join(outdir, 'lists'))
 659
 660     repos = {}
 661     for release in cfg.options('release codenames'):
 662         if release == 'data':
 663             # no seperate list for the data archive
 664             continue
 665         transrel = trans_codename(release, cfg)
 666         repos[transrel] = []
 667         for mirror in cfg.options('mirrors'):
 668             listname = '%s.%s' % (release, mirror)
 669             repos[transrel].append((mirror, listname))
 670             lf = open(os.path.join(outdir, 'lists', listname), 'w')
 671             for rel in ('data', release):
 672                 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
 673                                                           rel)
 674                 lf.write('deb %s' % aptcfg)
 675                 lf.write('#deb-src %s' % aptcfg)
 676             lf.close()
 677
 678     id2codename = dict([(cfg.get('release backport ids', r), r)
 679                             for r in cfg.options('release codenames')])
 680     id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
 681                             for r in cfg.options('release codenames')])
 682     mirror2name = dict([(m, cfg.get('mirror names', m))
 683                             for m in cfg.options('mirrors')])
 684     mirror2url = dict([(m, cfg.get('mirrors', m))
 685                             for m in cfg.options('mirrors')])
 686     srclist_template = jinja_env.get_template('sources_lists.rst')
 687     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
 688     sl.write(srclist_template.render(id2codename=id2codename,
 689                                      id2relname=id2relname,
 690                                      mirror2name=mirror2name,
 691                                      mirror2url=mirror2url))
 692     sl.close()
 693
 694
 695 def sort_by_tasks(db):
 696     tasks = {}
 697     for pkg in db.keys():
 698         if not 'blends' in db[pkg]:
 699             # no blend info
 700             continue
 701         blendinfo = db[pkg]['blends']
 702         if not 'tasks' in blendinfo:
 703             # no task info in blend data
 704             continue
 705         taskinfo = blendinfo['tasks']
 706         for task in taskinfo:
 707             taskname = task[1]
 708             if not taskname in tasks:
 709                 tasks[taskname] = []
 710             else:
 711                 tasks[taskname].append(pkg)
 712     return tasks
 713
 714
 715 def sort_by_maintainer(db):
 716     maints = {}
 717     maint_ids = {}
 718     for pkg in db.keys():
 719         maint = None
 720         pkginfo = db[pkg]
 721         # start with the blends info
 722         if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
 723             maint = pkginfo['blends']['responsible']
 724         if not 'main' in db[pkg] and maint is None:
 725             # no info
 726             continue
 727         info = db[pkg]['main']
 728         if not 'maintainer' in info and maint is None:
 729             # no maintainer info
 730             continue
 731         if 'original_maintainer' in info and not info['original_maintainer'] is None:
 732             maint = info['original_maintainer']
 733         elif 'maintainer' in info and not info['maintainer'] is None:
 734             maint = info['maintainer']
 735         if maint is None:
 736             # no sane maintainer info
 737             continue
 738         # safeguard: <> confuses sphinx and we don't care about different emails
 739         maint = maint[:maint.find('<')].strip()
 740         # kick out non-ascii ones (should not be, but too tired to find the bug)
 741         try:
 742             codecs.ascii_decode(maint)
 743         except UnicodeEncodeError:
 744             continue
 745         if not maint.lower() in maints:
 746             maints[maint.lower()] = []
 747             maint_ids[maint.lower()] = [maint]
 748         else:
 749             maint_ids[maint.lower()].append(maint)
 750         maints[maint.lower()].append(pkg)
 751     # remove duplicates
 752     out = {}
 753     for m in maints:
 754         out[maint_ids[m][0]] = np.unique(maints[m])
 755     return out
 756
 757
 758 def sort_by_release(db):
 759     rels = {}
 760     for pkg in db.keys():
 761         pkginfo = db[pkg]
 762         for sec in pkginfo:
 763             if not isinstance(sec, tuple):
 764                 # only releases are of interest
 765                 continue
 766             relname = sec[0]
 767             if not relname in rels:
 768                 rels[relname] = []
 769             else:
 770                 rels[relname].append(pkg)
 771     # remove duplicates
 772     for r in rels:
 773         rels[r] = np.unique(rels[r])
 774     return rels
 775
 776
 777 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
 778     create_dir(outdir)
 779     create_dir(os.path.join(outdir, 'pkgs'))
 780     create_dir(os.path.join(outdir, 'pkglists'))
 781     # template for individual package listings
 782     toc_template = jinja_env.get_template('pkgs_toc.rst')
 783     # the high-level package list overview
 784     hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
 785     hltoc.write('.. _pkglists:\n\n')
 786     hltoc.write(underline_text('Software packages', '='))
 787     defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
 788             (sort_by_release(db), 'By release', 'Packages for %s'),
 789             (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
 790     for def_ in defs:
 791         # TOC for each thingie
 792         pkgsdict, sectitle, title_tmpl = def_
 793         hltoc.write(underline_text(sectitle, '-'))
 794         ids = pkgsdict.keys()
 795         ids.sort()
 796         for id_ in ids:
 797             label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
 798             # filter out crap
 799             filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
 800             if not len(filtered_pkgs):
 801                 continue
 802             plist = toc_template.render(
 803                         label=label,
 804                         title=underline_text(title_tmpl % id_, '='),
 805                         pkgs=filtered_pkgs,
 806                         db=db)
 807             toc = codecs.open(os.path.join(outdir,
 808                                            'pkglists',
 809                                            '%s.rst' % label),
 810                               'w', 'utf-8')
 811             toc.write(plist)
 812             toc.close()
 813             hltoc.write('* :ref:`%s`\n' % label)
 814         hltoc.write('\n\n')
 815
 816
 817     # now a complete list of all packages
 818     hltoc.write(underline_text('Complete list', '-'))
 819     toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
 820                       'w', 'utf-8')
 821     toc.write(toc_template.render(label='full_pkg_list',
 822                 title=underline_text('Complete package list', '='),
 823                 pkgs=db.keys(), db=db))
 824     toc.close()
 825     hltoc.write('* :ref:`full_pkg_list`\n')
 826     hltoc.close()
 827
 828     # and now each individual package page
 829     pkg_template = jinja_env.get_template('pkg.rst')
 830     for p in db.keys():
 831         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
 832         # when no page is available skip this package
 833         if page is None:
 834             continue
 835         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
 836         pf.write(page)
 837         pf.close()
 838
 839
 840 def prepOptParser(op):
 841     # use module docstring for help output
 842     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
 843
 844     op.add_option("--db",
 845                   action="store", type="string", dest="db",
 846                   default=None,
 847                   help="Database file to read. Default: None")
 848
 849     op.add_option("--cfg",
 850                   action="store", type="string", dest="cfg",
 851                   default=None,
 852                   help="Repository config file.")
 853
 854     op.add_option("-o", "--outdir",
 855                   action="store", type="string", dest="outdir",
 856                   default=None,
 857                   help="Target directory for ReST output. Default: None")
 858
 859     op.add_option("-r", "--release-url",
 860                   action="append", dest="release_urls",
 861                   help="None")
 862
 863     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
 864                   type="string", default=None, help="None")
 865
 866     op.add_option("--extracts", action="store", dest="extracts_dir",
 867                   type="string", default=None, help="None")
 868
 869
 870 def main():
 871     op = OptionParser(version="%prog 0.0.2")
 872     prepOptParser(op)
 873
 874     (opts, args) = op.parse_args()
 875
 876     if len(args) != 1:
 877         print('There needs to be exactly one command')
 878         sys.exit(1)
 879
 880     cmd = args[0]
 881
 882     if opts.cfg is None:
 883         print("'--cfg' option is mandatory.")
 884         sys.exit(1)
 885     if opts.db is None:
 886         print("'--db' option is mandatory.")
 887         sys.exit(1)
 888
 889
 890     cfg = SafeConfigParser()
 891     cfg.read(opts.cfg)
 892
 893     # load existing db, unless renew is requested
 894     if cmd == 'updatedb':
 895         db = {}
 896         if cfg.has_option('packages', 'select taskfiles'):
 897             db = add_pkgfromtaskfile(db, cfg.get('packages',
 898                                                  'select taskfiles').split())
 899
 900         # add additional package names from config file
 901         if cfg.has_option('packages', 'select names'):
 902             for p in cfg.get('packages', 'select names').split():
 903                 if not db.has_key(p):
 904                     db[p] = get_emptydbentry()
 905
 906         # get info from task files
 907         if cfg.has_option('packages', 'prospective'):
 908             for url in cfg.get('packages', 'prospective').split():
 909                 db = import_blendstask(cfg, db, url)
 910
 911         # parse NeuroDebian repository
 912         if cfg.has_option('neurodebian', 'releases'):
 913             for rurl in cfg.get('neurodebian', 'releases').split():
 914                 db = import_release(cfg, db, rurl)
 915
 916         # collect package information from DDE
 917         db = import_dde(cfg, db)
 918         # get info from NITRC
 919         db = import_nitrc(cfg, db)
 920         # store the new DB
 921         store_db(db, opts.db)
 922         # and be done
 923         return
 924
 925     # load the db from file
 926     db = read_db(opts.db)
 927
 928     # fire up jinja
 929     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
 930
 931     # generate package pages and TOC and write them to files
 932     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
 933
 934     write_sourceslist(jinja_env, cfg, opts.outdir)
 935
 936 if __name__ == "__main__":
 937     main()