neurodebian/dde.py

   1 #!/usr/bin/env python
   2 """Tell me who you are!
   3 """
   4
   5 import pysvn
   6 import json
   7 from debian_bundle import deb822
   8 import apt
   9 from ConfigParser import SafeConfigParser
  10 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
  11 import sys
  12 import os
  13 import shutil
  14 import urllib2
  15 import urllib
  16 import codecs
  17 import subprocess
  18 # templating
  19 from jinja2 import Environment, PackageLoader
  20
  21 from pprint import PrettyPrinter
  22
  23
  24 class AptListsCache(object):
  25     def __init__(self, cachedir='build/cache',
  26                  ro_cachedirs=None,
  27                  init_db=None):
  28         self.cachedir = cachedir
  29
  30         if not ro_cachedirs is None:
  31             self.ro_cachedirs = ro_cachedirs
  32         else:
  33             self.ro_cachedirs = []
  34
  35         # create cachedir
  36         create_dir(self.cachedir)
  37
  38     def get(self, url, update=False):
  39         """Looks in the cache if the file is there and takes the cached one.
  40         Otherwise it is downloaded first.
  41
  42         Knows how to deal with http:// and svn:// URLs.
  43
  44         :Return:
  45           file handler
  46         """
  47         # look whether it is compressed
  48         cext = url.split('.')[-1]
  49         if cext in ['gz', 'bz2']:
  50             target_url = url[:-1 * len(cext) -1]
  51         else:
  52             # assume not compressed
  53             target_url = url
  54             cext = None
  55
  56         # turn url into a filename -- mimik what APT does for
  57         # /var/lib/apt/lists/
  58         tfilename = '_'.join(target_url.split('/')[2:])
  59
  60         # if we need to download anyway do not search
  61         if update:
  62             cfilename = os.path.join(self.cachedir, tfilename)
  63         else:
  64             # look for the uncompressed file anywhere in the cache
  65             cfilename = None
  66             for cp in [self.cachedir] + self.ro_cachedirs:
  67                 if os.path.exists(os.path.join(cp, tfilename)):
  68                     cfilename = os.path.join(cp, tfilename)
  69
  70         # nothing found?
  71         if cfilename is None:
  72             # add cache item
  73             cfilename = os.path.join(self.cachedir, tfilename)
  74             update = True
  75
  76         # if updated needed -- download
  77         if update:
  78             #print 'Caching file from %s' % url
  79
  80             if url.startswith('svn://'):
  81                 # export from SVN
  82                 pysvn.Client().export(url, cfilename)
  83             if url.startswith('http://'):
  84                 # download
  85                 tempfile, ignored = urllib.urlretrieve(url)
  86
  87                 # decompress
  88                 decompressor = None
  89                 if cext == 'gz':
  90                     decompressor = 'gzip'
  91                 elif cext == 'bz2':
  92                     decompressor = 'bzip2'
  93                 elif cext == None:
  94                     decompressor = None
  95                 else:
  96                     raise ValueError, \
  97                           "Don't know how to decompress %s files" \
  98                           % cext
  99
 100                 if not decompressor is None:
 101                     if subprocess.call([decompressor, '-d', '-q', '-f',
 102                                        tempfile]) == 1:
 103                         raise RuntimeError, \
 104                               "Something went wrong while decompressing '%s'" \
 105                               % tempfile
 106
 107                 # move decompressed file into cache
 108                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
 109
 110                 # XXX do we need that if explicit filename is provided?
 111                 urllib.urlcleanup()
 112
 113         # open cached file
 114         fh = codecs.open(cfilename, 'r', 'utf-8')
 115
 116         return fh
 117
 118
 119 def add_pkgfromtaskfile(db, urls):
 120     cache = AptListsCache()
 121     pkgs = []
 122
 123     for task in urls:
 124         fh = cache.get(task)
 125
 126         # loop over all stanzas
 127         for stanza in deb822.Packages.iter_paragraphs(fh):
 128             if stanza.has_key('Depends'):
 129                 pkg = stanza['Depends']
 130             elif stanza.has_key('Suggests'):
 131                 pkg = stanza['Suggests']
 132             else:
 133                 continue
 134
 135             # account for multiple packages per line
 136             if pkg.count(','):
 137                 pkgs += [p.strip() for p in pkg.split(',')]
 138             else:
 139                 pkgs.append(pkg.strip())
 140
 141     for p in pkgs:
 142         if not db.has_key(p):
 143             db[p] = get_emptydbentry()
 144
 145     return db
 146
 147 def get_emptydbentry():
 148     return {'main': {}}
 149
 150 def import_blendstask(db, url):
 151     cache = AptListsCache()
 152     fh = cache.get(url)
 153     task_name = None
 154
 155     # figure out blend's task page URL, since they differ from blend to blend
 156     urlsec = url.split('/')
 157     blendname = urlsec[-3]
 158     if blendname == 'debian-med':
 159         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
 160     elif blendname == 'debian-science':
 161         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
 162     else:
 163         raise ValueError('Unknown blend "%s"' % blendname)
 164     taskpage_url += urlsec[-1]
 165
 166     for st in deb822.Packages.iter_paragraphs(fh):
 167         if st.has_key('Task'):
 168             task_name = st['Task']
 169             task = (blendname, task_name, taskpage_url)
 170
 171         if st.has_key('Depends'):
 172             pkg = st['Depends']
 173         elif st.has_key('Suggests'):
 174             pkg = st['Suggests']
 175         else:
 176 #            print 'Warning: Cannot determine name of prospective package ' \
 177 #                    '... ignoring. Dump follows:'
 178 #            print st
 179             continue
 180
 181         # take care of pkg lists
 182         for p in pkg.split(', '):
 183             if not db.has_key(p):
 184                 print 'Ignoring blend package "%s"' % p
 185                 continue
 186
 187             info = {}
 188
 189             # blends info
 190             info['tasks'] = [task]
 191             if st.has_key('License'):
 192                 info['license'] = st['License']
 193             if st.has_key('Responsible'):
 194                 info['responsible'] = st['Responsible']
 195
 196             # pkg description
 197             if st.has_key('Pkg-Description'):
 198                 descr = st['Pkg-Description'].split('\n')
 199                 info['description'] = descr[0].strip()
 200                 info['long_description'] = \
 201                         u'\n'.join(descr[1:])
 202
 203                 # charge the basic property set
 204                 db[p]['main']['description'] = info['description']
 205                 db[p]['main']['long_description'] = info['long_description']
 206             if st.has_key('WNPP'):
 207                 db[p]['main']['debian_itp'] = st['WNPP']
 208             if st.has_key('Pkg-URL'):
 209                 db[p]['main']['other_pkg'] = st['Pkg-URL']
 210             if st.has_key('Homepage'):
 211                 db[p]['main']['homepage'] = st['Homepage']
 212
 213             # Publications
 214             if st.has_key('Published-Title'):
 215                 pub = {'title': st['Published-Title']}
 216                 if st.has_key('Published-Authors'):
 217                     pub['authors'] = st['Published-Authors']
 218                 if st.has_key('Published-Year'):
 219                     pub['year'] = st['Published-Year']
 220                 if st.has_key('Published-In'):
 221                     pub['in'] = st['Published-In']
 222                 if st.has_key('Published-URL'):
 223                     pub['url'] = st['Published-URL']
 224                 if st.has_key('Published-DOI'):
 225                     pub['doi'] = st['Published-DOI']
 226                     # need at least one URL
 227                     if not pub.has_key('url'):
 228                         pub['url'] = st['Published-DOI']
 229
 230                 db[p]['main']['publication'] = pub
 231
 232             # Registration
 233             if st.has_key('Registration'):
 234                 db[p]['main']['registration'] = st['Registration']
 235
 236             # Remarks
 237             if st.has_key('Remark'):
 238                 # prepend a single space to make it look like a long description
 239                 info['remark'] = convert_longdescr(' ' + st['Remark'])
 240
 241             # only store if there isn't something already
 242             if not db[p].has_key('blends'):
 243                 db[p]['blends'] = info
 244             else:
 245                 # just add this tasks name and id
 246                 db[p]['blends']['tasks'].append(task)
 247
 248     return db
 249
 250
 251 def get_releaseinfo(rurl):
 252     cache = AptListsCache()
 253     # root URL of the repository
 254     baseurl = '/'.join(rurl.split('/')[:-1])
 255     # get the release file from the cache
 256     release_file = cache.get(rurl)
 257
 258     # create parser instance
 259     rp = deb822.Release(release_file)
 260
 261     # architectures on this dist
 262     archs = rp['Architectures'].split()
 263     components = rp['Components'].split()
 264     # compile a new codename that also considers the repository label
 265     # to distinguish between official and unofficial repos.
 266     label = rp['Label']
 267     origin = rp['Origin']
 268     codename = rp['Codename']
 269     labelcode = '_'.join([rp['Label'], rp['Codename']])
 270
 271     # cleanup
 272     release_file.close()
 273
 274     return {'baseurl': baseurl, 'archs': archs, 'components': components,
 275             'codename': codename, 'label': label, 'labelcode': labelcode,
 276             'origin': origin}
 277
 278
 279 def build_pkgsurl(baseurl, component, arch):
 280     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
 281
 282
 283 def import_release(cfg, db, rurl):
 284     cache = AptListsCache()
 285
 286     ri = get_releaseinfo(rurl)
 287
 288     # compile the list of Packages files to parse and parse them
 289     for c in ri['components']:
 290         for a in ri['archs']:
 291             # compile packages URL
 292             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
 293
 294             # retrieve from cache
 295             packages_file = cache.get(pkgsurl)
 296
 297             # parse
 298             for stanza in deb822.Packages.iter_paragraphs(packages_file):
 299                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
 300
 301             # cleanup
 302             packages_file.close()
 303
 304     return db
 305
 306 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
 307     """
 308     :Parameter:
 309       st: Package section
 310     """
 311     pkg = st['Package']
 312
 313     # only care for known packages
 314     if not db.has_key(pkg):
 315 #        print 'Ignoring NeuroDebian package "%s"' % pkg
 316         return db
 317
 318     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
 319
 320     if db[pkg].has_key(distkey):
 321         info = db[pkg][distkey]
 322     else:
 323         info = {'architecture': []}
 324
 325     # fill in data
 326     if not st['Architecture'] in info['architecture']:
 327         info['architecture'].append(st['Architecture'])
 328     info['maintainer'] = st['Maintainer']
 329     if st.has_key('Homepage'):
 330         info['homepage'] = st['Homepage']
 331     info['version'] = st['Version']
 332
 333     # origin
 334     info['distribution'] = origin
 335     info['release'] = codename
 336     info['component'] = component
 337
 338     # pool url
 339     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
 340
 341     # pkg description
 342     descr = st['Description'].replace('%', '%%').split('\n')
 343     info['description'] = descr[0].strip()
 344     info['long_description'] = u'\n'.join(descr[1:])
 345
 346     db[pkg][distkey] = info
 347
 348     # charge the basic property set
 349     db[pkg]['main']['description'] = info['description']
 350     db[pkg]['main']['long_description'] = info['long_description']
 351     if st.has_key('Source'):
 352         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
 353     else:
 354         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
 355     if st.has_key('Homepage'):
 356         db[pkg]['main']['homepage'] = st['Homepage']
 357     if st.has_key('Recommends'):
 358         db[pkg]['main']['recommends'] = st['Recommends']
 359
 360     return db
 361
 362
 363 def trans_codename(codename, cfg):
 364     """Translate a known codename into a release description.
 365
 366     Unknown codenames will simply be returned as is.
 367     """
 368     # if we know something, tell
 369     if codename in cfg.options('release codenames'):
 370         return cfg.get('release codenames', codename)
 371     else:
 372         return codename
 373
 374
 375 def create_dir(path):
 376     if os.path.exists(path):
 377         return
 378
 379     ps = path.split(os.path.sep)
 380
 381     for i in range(1,len(ps) + 1):
 382         p = os.path.sep.join(ps[:i])
 383
 384         if not os.path.exists(p):
 385             os.mkdir(p)
 386
 387
 388 def dde_get(url):
 389     try:
 390         return json.read(urllib2.urlopen(url+"?t=json").read())['r']
 391     except (urllib2.HTTPError, StopIteration):
 392         print "NO PKG INFO AT:", url
 393         return False
 394
 395
 396 def import_dde(cfg, db):
 397     query_url = cfg.get('dde', 'pkgquery_url')
 398     for p in db.keys():
 399         # get freshest
 400         q = dde_get(query_url + "/packages/all/%s" % p)
 401         if q:
 402             # copy all stuff, while preserving non-overlapping information
 403             for k, v in q.iteritems():
 404                 db[p]['main'][k] = v
 405             # get latest popcon info for debian and ubuntu
 406             # cannot use origin field itself, since it is none for few packages
 407             # i.e. python-nifti
 408             origin = q['drc'].split()[0]
 409             if origin == 'ubuntu':
 410                 if q.has_key('popcon'):
 411                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 412                 # if we have ubuntu, need to get debian
 413                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
 414                 if q and q.has_key('popcon'):
 415                     db[p]['main']['debian_popcon'] = q['popcon']
 416             elif origin == 'debian':
 417                 if q.has_key('popcon'):
 418                     db[p]['main']['debian_popcon'] = q['popcon']
 419                 # if we have debian, need to get ubuntu
 420                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
 421                 if q and q.has_key('popcon'):
 422                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 423             else:
 424                 print("Ignoring unkown origin '%s' for package '%s'." \
 425                         % (origin, p))
 426
 427         # now get info for package from all releases in UDD
 428         q = dde_get(query_url + "/dist/p:%s" % p)
 429         if not q:
 430             continue
 431         # hold all info about this package per distribution release
 432         info = {}
 433         for cp in q:
 434             distkey = (trans_codename(cp['release'], cfg),
 435                        "%s-%s" % (cp['distribution'], cp['release']))
 436             if not info.has_key(distkey):
 437                 info[distkey] = cp
 438                 # turn into a list to append others later
 439                 info[distkey]['architecture'] = [info[distkey]['architecture']]
 440             # accumulate data for multiple over archs
 441             else:
 442                 comp = apt.VersionCompare(cp['version'],
 443                                           info[distkey]['version'])
 444                 # found another arch for the same version
 445                 if comp == 0:
 446                     info[distkey]['architecture'].append(cp['architecture'])
 447                 # found newer version, dump the old ones
 448                 elif comp > 0:
 449                     info[distkey] = cp
 450                     # turn into a list to append others later
 451                     info[distkey]['architecture'] = [info[distkey]['architecture']]
 452                 # simply ignore older versions
 453                 else:
 454                     pass
 455
 456         # finally assign the new package data
 457         for k, v in info.iteritems():
 458             db[p][k] = v
 459
 460     return db
 461
 462
 463 def convert_longdescr(ld):
 464     ld = ld.replace('% ', '%% ')
 465     ld = ld.split('\n')
 466     for i, l in enumerate(ld):
 467         if l == ' .':
 468             ld[i] = ' #NEWLINEMARKER#'
 469         # look for embedded lists
 470         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
 471             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
 472
 473     ld = u' '.join([l[1:] for l in ld])
 474     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
 475     # cleanup any leftover (e.g. trailing markers)
 476     ld = ld.replace('#NEWLINEMARKER#', '')
 477     return ld
 478
 479
 480 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
 481     # local binding for ease of use
 482     pkgdb = db[pkg]
 483     # do nothing if there is not at least the very basic stuff
 484     if not pkgdb['main'].has_key('description'):
 485         return
 486     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
 487     underline = '*' * (len(title) + 2)
 488     title = '%s\n %s\n%s' % (underline, title, underline)
 489
 490     page = template.render(
 491             pkg=pkg,
 492             title=title,
 493             long_description=convert_longdescr(pkgdb['main']['long_description']),
 494             cfg=cfg,
 495             db=pkgdb,
 496             fulldb=db)
 497     # the following can be replaced by something like
 498     # {% include "sidebar.html" ignore missing %}
 499     # in the template whenever jinja 2.2 becomes available
 500     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
 501     if os.path.exists(addenum):
 502         page += '\n\n.. include:: %s\n' % addenum
 503     return page
 504
 505
 506 def store_db(db, filename):
 507     pp = PrettyPrinter(indent=2)
 508     f = codecs.open(filename, 'w', 'utf-8')
 509     f.write(pp.pformat(db))
 510     f.close()
 511
 512
 513 def read_db(filename):
 514     f = codecs.open(filename, 'r', 'utf-8')
 515     db = eval(f.read())
 516     return db
 517
 518 def write_sourceslist(jinja_env, cfg, outdir):
 519     create_dir(outdir)
 520     create_dir(os.path.join(outdir, '_static'))
 521
 522     repos = {}
 523     for release in cfg.options('release codenames'):
 524         transrel = trans_codename(release, cfg)
 525         repos[transrel] = []
 526         for mirror in cfg.options('mirrors'):
 527             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
 528             repos[transrel].append((mirror, listname))
 529             lf = open(os.path.join(outdir, '_static', listname), 'w')
 530             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
 531                                                       release)
 532             lf.write('deb %s' % aptcfg)
 533             lf.write('deb-src %s' % aptcfg)
 534             lf.close()
 535
 536     srclist_template = jinja_env.get_template('sources_lists.rst')
 537     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
 538     sl.write(srclist_template.render(repos=repos))
 539     sl.close()
 540
 541
 542 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
 543     create_dir(outdir)
 544     create_dir(os.path.join(outdir, 'pkgs'))
 545
 546     # generate the TOC with all packages
 547     toc_template = jinja_env.get_template('pkgs_toc.rst')
 548     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
 549     toc.write(toc_template.render(pkgs=db.keys()))
 550     toc.close()
 551
 552     # and now each individual package page
 553     pkg_template = jinja_env.get_template('pkg.rst')
 554     for p in db.keys():
 555         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
 556         # when no page is available skip this package
 557         if page is None:
 558             continue
 559         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
 560         pf.write(page)
 561         pf.close()
 562
 563
 564 def prepOptParser(op):
 565     # use module docstring for help output
 566     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
 567
 568     op.add_option("--db",
 569                   action="store", type="string", dest="db",
 570                   default=None,
 571                   help="Database file to read. Default: None")
 572
 573     op.add_option("--cfg",
 574                   action="store", type="string", dest="cfg",
 575                   default=None,
 576                   help="Repository config file.")
 577
 578     op.add_option("-o", "--outdir",
 579                   action="store", type="string", dest="outdir",
 580                   default=None,
 581                   help="Target directory for ReST output. Default: None")
 582
 583     op.add_option("-r", "--release-url",
 584                   action="append", dest="release_urls",
 585                   help="None")
 586
 587     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
 588                   type="string", default=None, help="None")
 589
 590
 591 def main():
 592     op = OptionParser(version="%prog 0.0.2")
 593     prepOptParser(op)
 594
 595     (opts, args) = op.parse_args()
 596
 597     if len(args) != 1:
 598         print('There needs to be exactly one command')
 599         sys.exit(1)
 600
 601     cmd = args[0]
 602
 603     if opts.cfg is None:
 604         print("'--cfg' option is mandatory.")
 605         sys.exit(1)
 606     if opts.db is None:
 607         print("'--db' option is mandatory.")
 608         sys.exit(1)
 609
 610
 611     cfg = SafeConfigParser()
 612     cfg.read(opts.cfg)
 613
 614     # load existing db, unless renew is requested
 615     if cmd == 'updatedb':
 616         db = {}
 617         if cfg.has_option('packages', 'select taskfiles'):
 618             db = add_pkgfromtaskfile(db, cfg.get('packages',
 619                                                  'select taskfiles').split())
 620
 621         # add additional package names from config file
 622         if cfg.has_option('packages', 'select names'):
 623             for p in cfg.get('packages', 'select names').split():
 624                 if not db.has_key(p):
 625                     db[p] = get_emptydbentry()
 626
 627         # get info from task files
 628         if cfg.has_option('packages', 'prospective'):
 629             for url in cfg.get('packages', 'prospective').split():
 630                 db = import_blendstask(db, url)
 631
 632         # parse NeuroDebian repository
 633         if cfg.has_option('neurodebian', 'releases'):
 634             for rurl in cfg.get('neurodebian', 'releases').split():
 635                 db = import_release(cfg, db, rurl)
 636
 637         # collect package information from DDE
 638         db = import_dde(cfg, db)
 639         # store the new DB
 640         store_db(db, opts.db)
 641         # and be done
 642         return
 643
 644     # load the db from file
 645     db = read_db(opts.db)
 646
 647     # fire up jinja
 648     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
 649
 650     # generate package pages and TOC and write them to files
 651     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
 652
 653     write_sourceslist(jinja_env, cfg, opts.outdir)
 654
 655 if __name__ == "__main__":
 656     main()