neurodebian/dde.py

   1 #!/usr/bin/env python
   2 """Tell me who you are!
   3 """
   4
   5 import pysvn
   6 import json
   7 from debian_bundle import deb822
   8 import apt
   9 from ConfigParser import SafeConfigParser
  10 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
  11 import sys
  12 import os
  13 import shutil
  14 import urllib2
  15 import urllib
  16 import codecs
  17 import subprocess
  18 # templating
  19 from jinja2 import Environment, PackageLoader
  20
  21 from pprint import PrettyPrinter
  22
  23
  24 class AptListsCache(object):
  25     def __init__(self, cachedir='build/cache',
  26                  ro_cachedirs=None,
  27                  init_db=None):
  28         self.cachedir = cachedir
  29
  30         if not ro_cachedirs is None:
  31             self.ro_cachedirs = ro_cachedirs
  32         else:
  33             self.ro_cachedirs = []
  34
  35         # create cachedir
  36         create_dir(self.cachedir)
  37
  38     def get(self, url, update=False):
  39         """Looks in the cache if the file is there and takes the cached one.
  40         Otherwise it is downloaded first.
  41
  42         Knows how to deal with http:// and svn:// URLs.
  43
  44         :Return:
  45           file handler
  46         """
  47         # look whether it is compressed
  48         cext = url.split('.')[-1]
  49         if cext in ['gz', 'bz2']:
  50             target_url = url[:-1 * len(cext) -1]
  51         else:
  52             # assume not compressed
  53             target_url = url
  54             cext = None
  55
  56         # turn url into a filename -- mimik what APT does for
  57         # /var/lib/apt/lists/
  58         tfilename = '_'.join(target_url.split('/')[2:])
  59
  60         # if we need to download anyway do not search
  61         if update:
  62             cfilename = os.path.join(self.cachedir, tfilename)
  63         else:
  64             # look for the uncompressed file anywhere in the cache
  65             cfilename = None
  66             for cp in [self.cachedir] + self.ro_cachedirs:
  67                 if os.path.exists(os.path.join(cp, tfilename)):
  68                     cfilename = os.path.join(cp, tfilename)
  69
  70         # nothing found?
  71         if cfilename is None:
  72             # add cache item
  73             cfilename = os.path.join(self.cachedir, tfilename)
  74             update = True
  75
  76         # if updated needed -- download
  77         if update:
  78             #print 'Caching file from %s' % url
  79
  80             if url.startswith('svn://'):
  81                 # export from SVN
  82                 pysvn.Client().export(url, cfilename)
  83             if url.startswith('http://'):
  84                 # download
  85                 tempfile, ignored = urllib.urlretrieve(url)
  86
  87                 # decompress
  88                 decompressor = None
  89                 if cext == 'gz':
  90                     decompressor = 'gzip'
  91                 elif cext == 'bz2':
  92                     decompressor = 'bzip2'
  93                 elif cext == None:
  94                     decompressor = None
  95                 else:
  96                     raise ValueError, \
  97                           "Don't know how to decompress %s files" \
  98                           % cext
  99
 100                 if not decompressor is None:
 101                     if subprocess.call([decompressor, '-d', '-q', '-f',
 102                                        tempfile]) == 1:
 103                         raise RuntimeError, \
 104                               "Something went wrong while decompressing '%s'" \
 105                               % tempfile
 106
 107                 # move decompressed file into cache
 108                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
 109
 110                 # XXX do we need that if explicit filename is provided?
 111                 urllib.urlcleanup()
 112
 113         # open cached file
 114         fh = codecs.open(cfilename, 'r', 'utf-8')
 115
 116         return fh
 117
 118
 119 def add_pkgfromtaskfile(db, urls):
 120     cache = AptListsCache()
 121     pkgs = []
 122
 123     for task in urls:
 124         fh = cache.get(task)
 125
 126         # loop over all stanzas
 127         for stanza in deb822.Packages.iter_paragraphs(fh):
 128             if stanza.has_key('Depends'):
 129                 pkg = stanza['Depends']
 130             elif stanza.has_key('Suggests'):
 131                 pkg = stanza['Suggests']
 132             else:
 133                 continue
 134
 135             # account for multiple packages per line
 136             if pkg.count(','):
 137                 pkgs += [p.strip() for p in pkg.split(',')]
 138             else:
 139                 pkgs.append(pkg.strip())
 140
 141     for p in pkgs:
 142         if not db.has_key(p):
 143             db[p] = get_emptydbentry()
 144
 145     return db
 146
 147 def get_emptydbentry():
 148     return {'main': {}}
 149
 150 def import_blendstask(db, url):
 151     cache = AptListsCache()
 152     fh = cache.get(url)
 153     task_name = None
 154
 155     # figure out blend's task page URL, since they differ from blend to blend
 156     urlsec = url.split('/')
 157     blendname = urlsec[-3]
 158     if blendname == 'debian-med':
 159         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
 160     elif blendname == 'debian-science':
 161         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
 162     else:
 163         raise ValueError('Unknown blend "%s"' % blendname)
 164     taskpage_url += urlsec[-1]
 165
 166     for st in deb822.Packages.iter_paragraphs(fh):
 167         if st.has_key('Task'):
 168             task_name = st['Task']
 169             task = (blendname, task_name, taskpage_url)
 170
 171         if st.has_key('Depends'):
 172             pkg = st['Depends']
 173         elif st.has_key('Suggests'):
 174             pkg = st['Suggests']
 175         else:
 176 #            print 'Warning: Cannot determine name of prospective package ' \
 177 #                    '... ignoring. Dump follows:'
 178 #            print st
 179             continue
 180
 181         # take care of pkg lists
 182         for p in pkg.split(', '):
 183             if not db.has_key(p):
 184                 print 'Ignoring blend package "%s"' % p
 185                 continue
 186
 187             info = {}
 188
 189             # blends info
 190             info['tasks'] = [task]
 191             if st.has_key('License'):
 192                 info['license'] = st['License']
 193             if st.has_key('Responsible'):
 194                 info['responsible'] = st['Responsible']
 195
 196             # pkg description
 197             if st.has_key('Pkg-Description'):
 198                 descr = st['Pkg-Description'].split('\n')
 199                 info['description'] = descr[0].strip()
 200                 info['long_description'] = \
 201                         u'\n'.join(descr[1:])
 202
 203                 # charge the basic property set
 204                 db[p]['main']['description'] = info['description']
 205                 db[p]['main']['long_description'] = info['long_description']
 206             if st.has_key('WNPP'):
 207                 db[p]['main']['debian_itp'] = st['WNPP']
 208             if st.has_key('Pkg-URL'):
 209                 db[p]['main']['other_pkg'] = st['Pkg-URL']
 210             if st.has_key('Homepage'):
 211                 db[p]['main']['homepage'] = st['Homepage']
 212
 213             # Publications
 214             if st.has_key('Published-Title'):
 215                 pub = {'title': st['Published-Title']}
 216                 if st.has_key('Published-Authors'):
 217                     pub['authors'] = st['Published-Authors']
 218                 if st.has_key('Published-Year'):
 219                     pub['year'] = st['Published-Year']
 220                 if st.has_key('Published-In'):
 221                     pub['in'] = st['Published-In']
 222                 if st.has_key('Published-URL'):
 223                     pub['url'] = st['Published-URL']
 224                 if st.has_key('Published-DOI'):
 225                     pub['doi'] = st['Published-DOI']
 226                     # need at least one URL
 227                     if not pub.has_key('url'):
 228                         pub['url'] = st['Published-DOI']
 229
 230                 db[p]['main']['publication'] = pub
 231
 232             # Registration
 233             if st.has_key('Registration'):
 234                 print 'HAVE REGISTRATION:', p
 235                 db[p]['main']['registration'] = st['Registration']
 236
 237             # Remarks
 238             if st.has_key('Remark'):
 239                 # prepend a single space to make it look like a long description
 240                 info['remark'] = convert_longdescr(' ' + st['Remark'])
 241
 242             # only store if there isn't something already
 243             if not db[p].has_key('blends'):
 244                 db[p]['blends'] = info
 245             else:
 246                 # just add this tasks name and id
 247                 db[p]['blends']['tasks'].append(task)
 248
 249     return db
 250
 251
 252 def get_releaseinfo(rurl):
 253     cache = AptListsCache()
 254     # root URL of the repository
 255     baseurl = '/'.join(rurl.split('/')[:-1])
 256     # get the release file from the cache
 257     release_file = cache.get(rurl)
 258
 259     # create parser instance
 260     rp = deb822.Release(release_file)
 261
 262     # architectures on this dist
 263     archs = rp['Architectures'].split()
 264     components = rp['Components'].split()
 265     # compile a new codename that also considers the repository label
 266     # to distinguish between official and unofficial repos.
 267     label = rp['Label']
 268     origin = rp['Origin']
 269     codename = rp['Codename']
 270     labelcode = '_'.join([rp['Label'], rp['Codename']])
 271
 272     # cleanup
 273     release_file.close()
 274
 275     return {'baseurl': baseurl, 'archs': archs, 'components': components,
 276             'codename': codename, 'label': label, 'labelcode': labelcode,
 277             'origin': origin}
 278
 279
 280 def build_pkgsurl(baseurl, component, arch):
 281     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
 282
 283
 284 def import_release(cfg, db, rurl):
 285     cache = AptListsCache()
 286
 287     ri = get_releaseinfo(rurl)
 288
 289     # compile the list of Packages files to parse and parse them
 290     for c in ri['components']:
 291         for a in ri['archs']:
 292             # compile packages URL
 293             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
 294
 295             # retrieve from cache
 296             packages_file = cache.get(pkgsurl)
 297
 298             # parse
 299             for stanza in deb822.Packages.iter_paragraphs(packages_file):
 300                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
 301
 302             # cleanup
 303             packages_file.close()
 304
 305     return db
 306
 307 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
 308     """
 309     :Parameter:
 310       st: Package section
 311     """
 312     pkg = st['Package']
 313
 314     # only care for known packages
 315     if not db.has_key(pkg):
 316 #        print 'Ignoring NeuroDebian package "%s"' % pkg
 317         return db
 318
 319     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
 320
 321     if db[pkg].has_key(distkey):
 322         info = db[pkg][distkey]
 323     else:
 324         info = {'architecture': []}
 325
 326     # fill in data
 327     if not st['Architecture'] in info['architecture']:
 328         info['architecture'].append(st['Architecture'])
 329     info['maintainer'] = st['Maintainer']
 330     if st.has_key('Homepage'):
 331         info['homepage'] = st['Homepage']
 332     info['version'] = st['Version']
 333
 334     # origin
 335     info['distribution'] = origin
 336     info['release'] = codename
 337     info['component'] = component
 338
 339     # pool url
 340     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
 341
 342     # pkg description
 343     descr = st['Description'].replace('%', '%%').split('\n')
 344     info['description'] = descr[0].strip()
 345     info['long_description'] = u'\n'.join(descr[1:])
 346
 347     db[pkg][distkey] = info
 348
 349     # charge the basic property set
 350     db[pkg]['main']['description'] = info['description']
 351     db[pkg]['main']['long_description'] = info['long_description']
 352     if st.has_key('Homepage'):
 353         db[pkg]['main']['homepage'] = st['Homepage']
 354
 355     return db
 356
 357
 358 def trans_codename(codename, cfg):
 359     """Translate a known codename into a release description.
 360
 361     Unknown codenames will simply be returned as is.
 362     """
 363     # if we know something, tell
 364     if codename in cfg.options('release codenames'):
 365         return cfg.get('release codenames', codename)
 366     else:
 367         return codename
 368
 369
 370 def create_dir(path):
 371     if os.path.exists(path):
 372         return
 373
 374     ps = path.split(os.path.sep)
 375
 376     for i in range(1,len(ps) + 1):
 377         p = os.path.sep.join(ps[:i])
 378
 379         if not os.path.exists(p):
 380             os.mkdir(p)
 381
 382
 383 def dde_get(url):
 384     try:
 385         return json.read(urllib2.urlopen(url+"?t=json").read())['r']
 386     except (urllib2.HTTPError, StopIteration):
 387         print "SCREWED:", url
 388         return False
 389
 390
 391 def import_dde(cfg, db):
 392     query_url = cfg.get('dde', 'pkgquery_url')
 393     for p in db.keys():
 394         # get freshest
 395         q = dde_get(query_url + "/packages/all/%s" % p)
 396         if q:
 397             # copy all stuff, while preserving non-overlapping information
 398             for k, v in q.iteritems():
 399                 db[p]['main'][k] = v
 400             # get latest popcon info for debian and ubuntu
 401             # cannot use origin field itself, since it is none for few packages
 402             # i.e. python-nifti
 403             origin = q['drc'].split()[0]
 404             if origin == 'ubuntu':
 405                 if q.has_key('popcon'):
 406                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 407                 # if we have ubuntu, need to get debian
 408                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
 409                 if q and q.has_key('popcon'):
 410                     db[p]['main']['debian_popcon'] = q['popcon']
 411             elif origin == 'debian':
 412                 if q.has_key('popcon'):
 413                     db[p]['main']['debian_popcon'] = q['popcon']
 414                 # if we have debian, need to get ubuntu
 415                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
 416                 if q and q.has_key('popcon'):
 417                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 418             else:
 419                 print("Ignoring unkown origin '%s' for package '%s'." \
 420                         % (origin, p))
 421
 422         # now get info for package from all releases in UDD
 423         q = dde_get(query_url + "/dist/p:%s" % p)
 424         if not q:
 425             continue
 426         # hold all info about this package per distribution release
 427         info = {}
 428         for cp in q:
 429             distkey = (trans_codename(cp['release'], cfg),
 430                        "%s-%s" % (cp['distribution'], cp['release']))
 431             if not info.has_key(distkey):
 432                 info[distkey] = cp
 433                 # turn into a list to append others later
 434                 info[distkey]['architecture'] = [info[distkey]['architecture']]
 435             # accumulate data for multiple over archs
 436             else:
 437                 comp = apt.VersionCompare(cp['version'],
 438                                           info[distkey]['version'])
 439                 # found another arch for the same version
 440                 if comp == 0:
 441                     info[distkey]['architecture'].append(cp['architecture'])
 442                 # found newer version, dump the old ones
 443                 elif comp > 0:
 444                     info[distkey] = cp
 445                     # turn into a list to append others later
 446                     info[distkey]['architecture'] = [info[distkey]['architecture']]
 447                 # simply ignore older versions
 448                 else:
 449                     pass
 450
 451         # finally assign the new package data
 452         for k, v in info.iteritems():
 453             db[p][k] = v
 454
 455     return db
 456
 457
 458 def convert_longdescr(ld):
 459     ld = ld.replace('% ', '%% ')
 460     ld = ld.split('\n')
 461     for i, l in enumerate(ld):
 462         if l == ' .':
 463             ld[i] = ' #NEWLINEMARKER#'
 464         # look for embedded lists
 465         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
 466             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
 467
 468     ld = u' '.join([l[1:] for l in ld])
 469     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
 470     # cleanup any leftover (e.g. trailing markers)
 471     ld = ld.replace('#NEWLINEMARKER#', '')
 472     return ld
 473
 474
 475 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
 476     # local binding for ease of use
 477     db = db[pkg]
 478     # do nothing if there is not at least the very basic stuff
 479     if not db['main'].has_key('description'):
 480         return
 481     title = '**%s** -- %s' % (pkg, db['main']['description'])
 482     underline = '*' * (len(title) + 2)
 483     title = '%s\n %s\n%s' % (underline, title, underline)
 484
 485     page = template.render(
 486             pkg=pkg,
 487             title=title,
 488             long_description=convert_longdescr(db['main']['long_description']),
 489             cfg=cfg,
 490             db=db)
 491     # the following can be replaced by something like
 492     # {% include "sidebar.html" ignore missing %}
 493     # in the template whenever jinja 2.2 becomes available
 494     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
 495     if os.path.exists(addenum):
 496         page += '\n\n.. include:: %s\n' % addenum
 497     return page
 498
 499
 500 def store_db(db, filename):
 501     pp = PrettyPrinter(indent=2)
 502     f = codecs.open(filename, 'w', 'utf-8')
 503     f.write(pp.pformat(db))
 504     f.close()
 505
 506
 507 def read_db(filename):
 508     f = codecs.open(filename, 'r', 'utf-8')
 509     db = eval(f.read())
 510     return db
 511
 512 def write_sourceslist(jinja_env, cfg, outdir):
 513     create_dir(outdir)
 514     create_dir(os.path.join(outdir, '_static'))
 515
 516     repos = {}
 517     for release in cfg.options('release codenames'):
 518         transrel = trans_codename(release, cfg)
 519         repos[transrel] = []
 520         for mirror in cfg.options('mirrors'):
 521             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
 522             repos[transrel].append((mirror, listname))
 523             lf = open(os.path.join(outdir, '_static', listname), 'w')
 524             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
 525                                                       release)
 526             lf.write('deb %s' % aptcfg)
 527             lf.write('deb-src %s' % aptcfg)
 528             lf.close()
 529
 530     srclist_template = jinja_env.get_template('sources_lists.rst')
 531     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
 532     sl.write(srclist_template.render(repos=repos))
 533     sl.close()
 534
 535
 536 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
 537     create_dir(outdir)
 538     create_dir(os.path.join(outdir, 'pkgs'))
 539
 540     # generate the TOC with all packages
 541     toc_template = jinja_env.get_template('pkgs_toc.rst')
 542     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
 543     toc.write(toc_template.render(pkgs=db.keys()))
 544     toc.close()
 545
 546     # and now each individual package page
 547     pkg_template = jinja_env.get_template('pkg.rst')
 548     for p in db.keys():
 549         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
 550         # when no page is available skip this package
 551         if page is None:
 552             continue
 553         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
 554         pf.write(page)
 555         pf.close()
 556
 557
 558 def prepOptParser(op):
 559     # use module docstring for help output
 560     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
 561
 562     op.add_option("--db",
 563                   action="store", type="string", dest="db",
 564                   default=None,
 565                   help="Database file to read. Default: None")
 566
 567     op.add_option("--cfg",
 568                   action="store", type="string", dest="cfg",
 569                   default=None,
 570                   help="Repository config file.")
 571
 572     op.add_option("-o", "--outdir",
 573                   action="store", type="string", dest="outdir",
 574                   default=None,
 575                   help="Target directory for ReST output. Default: None")
 576
 577     op.add_option("-r", "--release-url",
 578                   action="append", dest="release_urls",
 579                   help="None")
 580
 581     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
 582                   type="string", default=None, help="None")
 583
 584
 585 def main():
 586     op = OptionParser(version="%prog 0.0.2")
 587     prepOptParser(op)
 588
 589     (opts, args) = op.parse_args()
 590
 591     if len(args) != 1:
 592         print('There needs to be exactly one command')
 593         sys.exit(1)
 594
 595     cmd = args[0]
 596
 597     if opts.cfg is None:
 598         print("'--cfg' option is mandatory.")
 599         sys.exit(1)
 600     if opts.db is None:
 601         print("'--db' option is mandatory.")
 602         sys.exit(1)
 603
 604
 605     cfg = SafeConfigParser()
 606     cfg.read(opts.cfg)
 607
 608     # load existing db, unless renew is requested
 609     if cmd == 'updatedb':
 610         db = {}
 611         if cfg.has_option('packages', 'select taskfiles'):
 612             db = add_pkgfromtaskfile(db, cfg.get('packages',
 613                                                  'select taskfiles').split())
 614
 615         # add additional package names from config file
 616         if cfg.has_option('packages', 'select names'):
 617             for p in cfg.get('packages', 'select names').split():
 618                 if not db.has_key(p):
 619                     db[p] = get_emptydbentry()
 620
 621         # get info from task files
 622         if cfg.has_option('packages', 'prospective'):
 623             for url in cfg.get('packages', 'prospective').split():
 624                 db = import_blendstask(db, url)
 625
 626         # parse NeuroDebian repository
 627         if cfg.has_option('neurodebian', 'releases'):
 628             for rurl in cfg.get('neurodebian', 'releases').split():
 629                 db = import_release(cfg, db, rurl)
 630
 631         # collect package information from DDE
 632         db = import_dde(cfg, db)
 633         # store the new DB
 634         store_db(db, opts.db)
 635         # and be done
 636         return
 637
 638     # load the db from file
 639     db = read_db(opts.db)
 640
 641     # fire up jinja
 642     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
 643
 644     # generate package pages and TOC and write them to files
 645     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
 646
 647     write_sourceslist(jinja_env, cfg, opts.outdir)
 648
 649 if __name__ == "__main__":
 650     main()