neurodebian/dde.py

   1 #!/usr/bin/env python
   2 """Tell me who you are!
   3 """
   4
   5 import pysvn
   6 import json
   7 import numpy as np
   8
   9 from ConfigParser import SafeConfigParser
  10 from optparse import OptionParser, OptionGroup, OptionConflictError
  11
  12 # Lets first assure no guarding (but annoying) warnings
  13 import warnings
  14 warnings.simplefilter('ignore', FutureWarning)
  15 warnings.filterwarnings('ignore',
  16                         'Module debian_bundle was already imported.*', UserWarning)
  17
  18 from debian import deb822
  19 import apt                              # initializes the "_system" ;)
  20 from apt_pkg import version_compare
  21
  22 import sys
  23 import os
  24 import copy
  25 import shutil
  26 import urllib2
  27 import urllib
  28 import codecs
  29 import subprocess
  30 import time
  31 import re
  32
  33 # templating
  34 from jinja2 import Environment, PackageLoader
  35
  36 from pprint import PrettyPrinter
  37
  38
  39 class AptListsCache(object):
  40     def __init__(self, cachedir='build/cache',
  41                  ro_cachedirs=None,
  42                  init_db=None):
  43         self.cachedir = cachedir
  44
  45         if not ro_cachedirs is None:
  46             self.ro_cachedirs = ro_cachedirs
  47         else:
  48             self.ro_cachedirs = []
  49
  50         # create cachedir
  51         create_dir(self.cachedir)
  52
  53     def get(self, url, update=False):
  54         """Looks in the cache if the file is there and takes the cached one.
  55         Otherwise it is downloaded first.
  56
  57         Knows how to deal with http:// and svn:// URLs.
  58
  59         :Return:
  60           file handler
  61         """
  62         # look whether it is compressed
  63         cext = url.split('.')[-1]
  64         if cext in ['gz', 'bz2']:
  65             target_url = url[:-1 * len(cext) -1]
  66         else:
  67             # assume not compressed
  68             target_url = url
  69             cext = None
  70
  71         # turn url into a filename -- mimik what APT does for
  72         # /var/lib/apt/lists/
  73         tfilename = '_'.join(target_url.split('/')[2:])
  74
  75         # if we need to download anyway do not search
  76         if update:
  77             cfilename = os.path.join(self.cachedir, tfilename)
  78         else:
  79             # look for the uncompressed file anywhere in the cache
  80             cfilename = None
  81             for cp in [self.cachedir] + self.ro_cachedirs:
  82                 if os.path.exists(os.path.join(cp, tfilename)):
  83                     cfilename = os.path.join(cp, tfilename)
  84
  85         # nothing found?
  86         if cfilename is None:
  87             # add cache item
  88             cfilename = os.path.join(self.cachedir, tfilename)
  89             update = True
  90
  91         # if updated needed -- download
  92         if update:
  93             #print 'Caching file from %s' % url
  94
  95             if url.startswith('svn://'):
  96                 # export from SVN
  97                 pysvn.Client().export(url, cfilename)
  98             if url.startswith('http://'):
  99                 # download
 100                 tempfile, ignored = urllib.urlretrieve(url)
 101
 102                 # decompress
 103                 decompressor = None
 104                 if cext == 'gz':
 105                     decompressor = 'gzip'
 106                 elif cext == 'bz2':
 107                     decompressor = 'bzip2'
 108                 elif cext == None:
 109                     decompressor = None
 110                 else:
 111                     raise ValueError, \
 112                           "Don't know how to decompress %s files" \
 113                           % cext
 114
 115                 if not decompressor is None:
 116                     if subprocess.call([decompressor, '-d', '-q', '-f',
 117                                        tempfile]) == 1:
 118                         raise RuntimeError, \
 119                               "Something went wrong while decompressing '%s'" \
 120                               % tempfile
 121
 122                 # move decompressed file into cache
 123                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
 124
 125                 # XXX do we need that if explicit filename is provided?
 126                 urllib.urlcleanup()
 127
 128         # open cached file
 129         fh = codecs.open(cfilename, 'r', 'utf-8')
 130
 131         return fh
 132
 133
 134 def add_pkgfromtaskfile(db, urls):
 135     cache = AptListsCache()
 136     pkgs = []
 137
 138     for task in urls:
 139         fh = cache.get(task)
 140
 141         # loop over all stanzas
 142         for stanza in deb822.Packages.iter_paragraphs(fh):
 143             if stanza.has_key('Depends'):
 144                 pkg = stanza['Depends']
 145             elif stanza.has_key('Recommends'):
 146                 pkg = stanza['Recommends']
 147             elif stanza.has_key('Suggests'):
 148                 pkg = stanza['Suggests']
 149             else:
 150                 continue
 151
 152             # account for multiple packages per line
 153             if pkg.count(','):
 154                 pkgs += [p.strip() for p in pkg.split(',')]
 155             else:
 156                 pkgs.append(pkg.strip())
 157
 158     for p in pkgs:
 159         if not db.has_key(p):
 160             db[p] = get_emptydbentry()
 161
 162     return db
 163
 164 def get_emptydbentry():
 165     return {'main': {}}
 166
 167 def import_blendstask(cfg, db, url):
 168     cache = AptListsCache()
 169     fh = cache.get(url)
 170     task_name = None
 171
 172     # figure out blend's task page URL, since they differ from blend to blend
 173     urlsec = url.split('/')
 174     blendname = urlsec[-3]
 175     if blendname == 'debian-med':
 176         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
 177     elif blendname == 'debian-science':
 178         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
 179     else:
 180         raise ValueError('Unknown blend "%s"' % blendname)
 181     taskpage_url += urlsec[-1]
 182
 183     for st in deb822.Packages.iter_paragraphs(fh):
 184         if st.has_key('Task'):
 185             task_name = st['Task']
 186             task = (blendname, task_name, taskpage_url)
 187
 188         if st.has_key('Depends'):
 189             pkg = st['Depends']
 190         elif st.has_key('Recommends'):
 191             pkg = st['Recommends']
 192         elif st.has_key('Suggests'):
 193             pkg = st['Suggests']
 194         else:
 195 #            print 'Warning: Cannot determine name of prospective package ' \
 196 #                    '... ignoring. Dump follows:'
 197 #            print st
 198             continue
 199
 200         # take care of pkg lists
 201         for p in pkg.split(', '):
 202             if not db.has_key(p):
 203                 print 'Ignoring blend package "%s"' % p
 204                 continue
 205
 206             info = {}
 207
 208             # blends info
 209             info['tasks'] = [task]
 210             if st.has_key('License'):
 211                 info['license'] = st['License']
 212             if st.has_key('Responsible'):
 213                 info['responsible'] = st['Responsible']
 214
 215             # pkg description
 216             if st.has_key('Pkg-Description'):
 217                 descr = st['Pkg-Description'].split('\n')
 218                 info['description'] = descr[0].strip()
 219                 info['long_description'] = \
 220                         u'\n'.join(descr[1:])
 221
 222                 # charge the basic property set
 223                 db[p]['main']['description'] = info['description']
 224                 db[p]['main']['long_description'] = info['long_description']
 225             if st.has_key('WNPP'):
 226                 db[p]['main']['debian_itp'] = st['WNPP']
 227             if st.has_key('Pkg-URL'):
 228                 db[p]['main']['other_pkg'] = st['Pkg-URL']
 229             if st.has_key('Homepage'):
 230                 db[p]['main']['homepage'] = st['Homepage']
 231
 232             # Publications
 233             if st.has_key('Published-Title'):
 234                 title = st['Published-Title']
 235                 if title[-1] == '.':
 236                     # trip trailing dot -- added later
 237                     pub = {'title': title[:-1]}
 238                 else:
 239                     pub = {'title': title}
 240                 if st.has_key('Published-Authors'):
 241                     pub['authors'] = st['Published-Authors']
 242                 if st.has_key('Published-Year'):
 243                     pub['year'] = st['Published-Year']
 244                 if st.has_key('Published-In'):
 245                     pub['in'] = st['Published-In']
 246                 if st.has_key('Published-URL'):
 247                     pub['url'] = st['Published-URL']
 248                 if st.has_key('Published-DOI'):
 249                     pub['doi'] = st['Published-DOI']
 250                     # need at least one URL
 251                     if not pub.has_key('url'):
 252                         pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
 253
 254                 db[p]['main']['publication'] = pub
 255
 256             # Registration
 257             if st.has_key('Registration'):
 258                 db[p]['main']['registration'] = st['Registration']
 259
 260             # Remarks
 261             if st.has_key('Remark'):
 262                 # prepend a single space to make it look like a long description
 263                 info['remark'] = convert_longdescr(' ' + st['Remark'])
 264
 265             # only store if there isn't something already
 266             if not db[p].has_key('blends'):
 267                 db[p]['blends'] = info
 268             else:
 269                 # just add this tasks name and id
 270                 db[p]['blends']['tasks'].append(task)
 271
 272             # handle pkg name aliases
 273             if p in cfg.options('blend package aliases'):
 274                 src_entry = db[p].copy()
 275                 # remove original entry
 276                 del db[p]
 277                 # copy the entry into all aliases
 278                 for alias in cfg.get('blend package aliases', p).split():
 279                     print "Aliasing %s to %s" % (p, alias)
 280                     db[alias] = copy.deepcopy(src_entry)
 281
 282     return db
 283
 284
 285 def get_releaseinfo(rurl):
 286     cache = AptListsCache()
 287     # root URL of the repository
 288     baseurl = '/'.join(rurl.split('/')[:-1])
 289     # get the release file from the cache
 290     release_file = cache.get(rurl)
 291
 292     # create parser instance
 293     rp = deb822.Release(release_file)
 294
 295     # architectures on this dist
 296     archs = rp['Architectures'].split()
 297     components = rp['Components'].split()
 298     # compile a new codename that also considers the repository label
 299     # to distinguish between official and unofficial repos.
 300     label = rp['Label']
 301     origin = rp['Origin']
 302     codename = rp['Codename']
 303     labelcode = '_'.join([rp['Label'], rp['Codename']])
 304
 305     # cleanup
 306     release_file.close()
 307
 308     return {'baseurl': baseurl, 'archs': archs, 'components': components,
 309             'codename': codename, 'label': label, 'labelcode': labelcode,
 310             'origin': origin}
 311
 312
 313 def build_pkgsurl(baseurl, component, arch):
 314     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
 315
 316
 317 def import_release(cfg, db, rurl):
 318     cache = AptListsCache()
 319
 320     ri = get_releaseinfo(rurl)
 321
 322     # compile the list of Packages files to parse and parse them
 323     for c in ri['components']:
 324         for a in ri['archs']:
 325             # compile packages URL
 326             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
 327
 328             # retrieve from cache
 329             packages_file = cache.get(pkgsurl)
 330
 331             # parse
 332             for stanza in deb822.Packages.iter_paragraphs(packages_file):
 333                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
 334
 335             # cleanup
 336             packages_file.close()
 337
 338     return db
 339
 340 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
 341     """
 342     :Parameter:
 343       st: Package section
 344     """
 345     pkg = st['Package']
 346
 347     # only care for known packages
 348     if not db.has_key(pkg):
 349 #        print 'Ignoring NeuroDebian package "%s"' % pkg
 350         return db
 351
 352     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
 353
 354     if db[pkg].has_key(distkey):
 355         info = db[pkg][distkey]
 356     else:
 357         info = {'architecture': []}
 358
 359     # fill in data
 360     if not st['Architecture'] in info['architecture']:
 361         info['architecture'].append(st['Architecture'])
 362     info['maintainer'] = st['Maintainer']
 363     if st.has_key('Homepage'):
 364         info['homepage'] = st['Homepage']
 365     info['version'] = st['Version']
 366
 367     # origin
 368     info['distribution'] = origin
 369     info['release'] = codename
 370     info['component'] = component
 371
 372     # pool url
 373     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
 374
 375     # pkg description
 376     descr = st['Description'].replace('%', '%%').split('\n')
 377     info['description'] = descr[0].strip()
 378     info['long_description'] = u'\n'.join(descr[1:])
 379
 380     db[pkg][distkey] = info
 381
 382     # charge the basic property set
 383     db[pkg]['main']['description'] = info['description']
 384     db[pkg]['main']['long_description'] = info['long_description']
 385     if st.has_key('Source'):
 386         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
 387     else:
 388         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
 389     if st.has_key('Homepage'):
 390         db[pkg]['main']['homepage'] = st['Homepage']
 391     if st.has_key('Recommends'):
 392         db[pkg]['main']['recommends'] = st['Recommends']
 393
 394     return db
 395
 396
 397 def trans_codename(codename, cfg):
 398     """Translate a known codename into a release description.
 399
 400     Unknown codenames will simply be returned as is.
 401     """
 402     # if we know something, tell
 403     if codename in cfg.options('release codenames'):
 404         return cfg.get('release codenames', codename)
 405     else:
 406         return codename
 407
 408
 409 def create_dir(path):
 410     if os.path.exists(path):
 411         return
 412
 413     ps = path.split(os.path.sep)
 414
 415     for i in range(1,len(ps) + 1):
 416         p = os.path.sep.join(ps[:i])
 417
 418         if not os.path.exists(p):
 419             os.mkdir(p)
 420
 421
 422 def dde_get(url, fail=False):
 423     # enforce delay to be friendly to DDE
 424     time.sleep(3)
 425     try:
 426         data = json.load(urllib2.urlopen(url+"?t=json"))['r']
 427         print "SUCCESS:", url
 428         return data
 429     except urllib2.HTTPError, e:
 430         print "NOINFO:", url, type(e)
 431         return False
 432     except urllib2.URLError, e:
 433         print "URLERROR:", url, type(e)
 434         if fail:
 435             print "Permanant failure"
 436             return False
 437         print "Try again after 30 seconds..."
 438         time.sleep(30)
 439         return dde_get(url, fail=True)
 440     except (StopIteration):
 441         print "NOINFO:", url
 442         return False
 443     except Exception, e:
 444         print "UDD-DOWN?:", url, type(e)
 445         return False
 446
 447
 448 def nitrc_get(spec, fail=False):
 449     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
 450     try:
 451         # change into this from python 2.6 on
 452         #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
 453         data = json.load(urllib2.urlopen(nitrc_url + '?spec=%s' % spec))
 454         print "NITRC-SUCCESS:", spec
 455     except urllib2.HTTPError, e:
 456         print "NITRC-NOINFO:", spec, type(e)
 457         return False
 458     except urllib2.URLError, e:
 459         print "NITRC-URLERROR:", spec, type(e)
 460         if fail:
 461             print "Permanant failure"
 462             return False
 463         print "Try again after 30 seconds..."
 464         time.sleep(30)
 465         return nitrc_get(spec, fail=True)
 466     return data
 467
 468
 469 def parse_nitrc(data):
 470     if data is False:
 471         return None
 472     # simplify -- there is only one project in the data
 473     project = data['projects'][0]
 474     nitrc_filtered = {'downloads': 0,
 475                       'id': project['id']}
 476     for pkg in project['packages']:
 477         for release in pkg['releases']:
 478             for file in release['files']:
 479                 nitrc_filtered['downloads'] += file['download_count']
 480     return nitrc_filtered
 481
 482
 483 def import_nitrc(cfg, db):
 484     for p in db.keys():
 485         if not cfg.has_option("nitrc ids", p):
 486             continue
 487         nitrc_spec = cfg.get("nitrc ids", p)
 488         nitrc_data = nitrc_get(nitrc_spec)
 489         nitrc_excerpt = parse_nitrc(nitrc_data)
 490         if not nitrc_excerpt is None:
 491             db[p]['nitrc'] = nitrc_excerpt
 492     return db
 493
 494
 495 def import_dde(cfg, db):
 496     query_url = cfg.get('dde', 'pkgquery_url')
 497     for p in db.keys():
 498         # get freshest
 499         q = dde_get(query_url + "/packages/all/%s" % p)
 500         if q:
 501             # copy all stuff, while preserving non-overlapping information
 502             for k, v in q.iteritems():
 503                 db[p]['main'][k] = v
 504             # get latest popcon info for debian and ubuntu
 505             # cannot use origin field itself, since it is none for few packages
 506             # i.e. python-nifti
 507             origin = q['drc'].split()[0]
 508             if origin == 'ubuntu':
 509                 if q.has_key('popcon'):
 510                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 511                 # if we have ubuntu, need to get debian
 512                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
 513                 if q and q.has_key('popcon'):
 514                     db[p]['main']['debian_popcon'] = q['popcon']
 515             elif origin == 'debian':
 516                 if q.has_key('popcon'):
 517                     db[p]['main']['debian_popcon'] = q['popcon']
 518                 # if we have debian, need to get ubuntu
 519                 q = dde_get(query_url + "/packages/prio-ubuntu-precise/%s" % p)
 520                 if q and q.has_key('popcon'):
 521                     db[p]['main']['ubuntu_popcon'] = q['popcon']
 522             else:
 523                 print("Ignoring unkown origin '%s' for package '%s'." \
 524                         % (origin, p))
 525
 526         # now get info for package from all releases in UDD
 527         q = dde_get(query_url + "/dist/p:%s" % p)
 528         if not q:
 529             continue
 530         # hold all info about this package per distribution release
 531         info = {}
 532         for cp in q:
 533             distkey = (trans_codename(cp['release'], cfg),
 534                        "%s-%s" % (cp['distribution'], cp['release']))
 535             if not info.has_key(distkey):
 536                 info[distkey] = cp
 537                 # turn into a list to append others later
 538                 info[distkey]['architecture'] = [info[distkey]['architecture']]
 539             # accumulate data for multiple over archs
 540             else:
 541                 comp = version_compare(cp['version'],
 542                                                    info[distkey]['version'])
 543                 # found another arch for the same version
 544                 if comp == 0:
 545                     info[distkey]['architecture'].append(cp['architecture'])
 546                 # found newer version, dump the old ones
 547                 elif comp > 0:
 548                     info[distkey] = cp
 549                     # turn into a list to append others later
 550                     info[distkey]['architecture'] = [info[distkey]['architecture']]
 551                 # simply ignore older versions
 552                 else:
 553                     pass
 554
 555         # finally assign the new package data
 556         for k, v in info.iteritems():
 557             db[p][k] = v
 558
 559     return db
 560
 561 def assure_unicode(s):
 562     """Assure that argument is unicode
 563
 564     Necessary if strings are not carrying out Pythonish 'u' prefix to
 565     signal UTF8 strings, but are in fact UTF8
 566     """
 567     if type(s) is unicode:
 568         return s
 569     elif type(s) is str:
 570         # attempt regular unicode call and if fails -- just decode it
 571         # into utf8
 572         try:
 573             return unicode(s)
 574         except UnicodeDecodeError, e:
 575             return s.decode('utf8')
 576     else:
 577         return assure_unicode(str(s))
 578
 579
 580 def convert_longdescr(ld):
 581     """
 582
 583     yoh: I think all this long description conversion will keep giving
 584     us problems since per se there is no strict regulations,
 585     especially in blends files
 586     """
 587     descr = u''
 588     ld = ld.replace('% ', '%% ')
 589     ld = ld.replace(r'\t', '    ') # just in case assuming tab 4
 590     ld = ld.split('\n')
 591     re_leadblanks = re.compile("^ *")
 592     re_itemized = re.compile("^[o*-+] +")
 593     re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
 594     re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
 595
 596     def unwrap_lines(lines):
 597         out = []
 598         indent_levels = [-1]
 599         for l in lines:
 600             match = re_itemized_gr.search(l).groups()
 601             if ((len(match[0]) in indent_levels and match[1] is None)
 602                 or (len(match[0]) > max(indent_levels)+4)) \
 603                 and match[2].strip() != '.':
 604                 # append to previous
 605                 if not out[-1].endswith(" "):
 606                     out[-1] += " "
 607                 out[-1] += match[2]
 608             else:
 609                 out.append(l)
 610
 611             indent_levels = [len(match[0])]
 612             if match[1] is not None:
 613                 indent_levels += [len(match[0]) + len(match[1])]
 614             if match[2].strip() == '.':
 615                 # reset though if '.'
 616                 indent_levels = [-1]
 617         return out
 618
 619     def dedent_withlevel(lines):
 620         """Dedent `lines` given in a list provide dedented lines and how much was dedented
 621         """
 622         nleading = min([re_leadblanks.search(l).span()[1]
 623                         for l in lines])
 624         return [l[nleading:] for l in lines], nleading
 625
 626     def block_lines(ld, level=0):
 627         # so we got list of lines
 628         # dedent all of them first
 629         ld, level = dedent_withlevel(ld)
 630
 631         # lets collect them in blocks/paragraphs
 632         # 1. into paragraphs split by '.'
 633         blocks, block = [], None
 634
 635         # next block can begin if
 636         #  1.  . line
 637         #  2. it was an itemized list and all items begin with
 638         #     the same symbol or get further indented accordingly
 639         #     so let's first check if it is an itemized list
 640         itemized_match = re_itemized.search(ld[0])
 641         if itemized_match:
 642             allow_indents = " "*itemized_match.span()[1]
 643         else:
 644             allow_indents = None
 645         for l in ld:
 646             if block is None or l.strip() == '.' \
 647                    or (len(l) and ( len(block) and (
 648                 (l.startswith(' ') and not block[-1].startswith(' '))
 649                 or
 650                 (not l.startswith(' ') and block[-1].startswith(' '))))):
 651                 block = []
 652                 blocks.append(block)
 653             if l.strip() != '.':
 654                 block.append(l)
 655         if len(blocks) == 1:
 656             return blocks[0]
 657         else:
 658             return [block_lines(b, level+1) for b in blocks if len(b)]
 659
 660     def blocks_to_rst(bls, level=0):
 661         # check if this block is an itemized beast
 662         #itemized_match = re_itemized_gr.search(bls[0][0])
 663         #if itemized_match:
 664         #    res += ' 'allow_indents = " "*itemized_match.span()[1]
 665         out = ''
 666         for b in bls:
 667             if isinstance(b, list):
 668                 if len(b) == 1:
 669                     out += " "*level + b[0] + '\n\n'
 670                 else:
 671                     out += blocks_to_rst(b, level+1)
 672             else:
 673                 e = " "*level + b + '\n'
 674                 if not re_itemized.search(b):
 675                     pass
 676                     #e += '\n'
 677                 elif len(e) and e[0] == ' ':
 678                     # strip 1 leading blank
 679                     e = e[1:]
 680                 out += e
 681         out += '\n'
 682         return out
 683
 684     ld = unwrap_lines(ld)
 685     bls = block_lines(ld)
 686     return blocks_to_rst(bls)
 687
 688
 689 def underline_text(text, symbol):
 690     underline = symbol * len(text)
 691     return '%s\n%s\n' % (text, underline)
 692
 693
 694 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
 695     # local binding for ease of use
 696     pkgdb = db[pkg]
 697     # do nothing if there is not at least the very basic stuff
 698     if not pkgdb['main'].has_key('description'):
 699         return
 700     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
 701     title = underline_text(title, '*')
 702
 703     ex_dir = None
 704     if 'sv' in pkgdb['main']:
 705         ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
 706         if not os.path.exists(ex_dir):
 707             ex_dir = None
 708     long_description = 'Description missing'
 709     if 'long_description' in pkgdb['main']:
 710         long_description=convert_longdescr(
 711                     assure_unicode(pkgdb['main']['long_description']))
 712     page = template.render(
 713             pkg=pkg,
 714             title=title,
 715             long_description=long_description,
 716             cfg=cfg,
 717             db=pkgdb,
 718             fulldb=db,
 719             extracts_dir=ex_dir,
 720             op=os.path)
 721     # the following can be replaced by something like
 722     # {% include "sidebar.html" ignore missing %}
 723     # in the template whenever jinja 2.2 becomes available
 724     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
 725     if os.path.exists(addenum):
 726         page += '\n\n.. include:: %s\n' % addenum
 727     return page
 728
 729
 730 def store_db(db, filename):
 731     pp = PrettyPrinter(indent=2)
 732     f = codecs.open(filename, 'w', 'utf-8')
 733     f.write(pp.pformat(db))
 734     f.close()
 735
 736
 737 def read_db(filename):
 738     f = codecs.open(filename, 'r', 'utf-8')
 739     db = eval(f.read())
 740     return db
 741
 742 def write_sourceslist(jinja_env, cfg, outdir):
 743     create_dir(outdir)
 744     create_dir(os.path.join(outdir, 'lists'))
 745
 746     repos = {}
 747     for release in cfg.options('release codenames'):
 748         if release == 'data':
 749             # no seperate list for the data archive
 750             continue
 751         transrel = trans_codename(release, cfg)
 752         repos[transrel] = []
 753         for mirror in cfg.options('mirrors'):
 754             listname = '%s.%s' % (release, mirror)
 755             repos[transrel].append((mirror, listname))
 756             lf = open(os.path.join(outdir, 'lists', listname), 'w')
 757             for rel in ('data', release):
 758                 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
 759                                                           rel)
 760                 lf.write('deb %s' % aptcfg)
 761                 lf.write('#deb-src %s' % aptcfg)
 762             lf.close()
 763
 764     id2codename = dict([(cfg.get('release backport ids', r), r)
 765                             for r in cfg.options('release codenames')])
 766     id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
 767                             for r in cfg.options('release codenames')])
 768     mirror2name = dict([(m, cfg.get('mirror names', m))
 769                             for m in cfg.options('mirrors')])
 770     mirror2url = dict([(m, cfg.get('mirrors', m))
 771                             for m in cfg.options('mirrors')])
 772     srclist_template = jinja_env.get_template('sources_lists.rst')
 773     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
 774     sl.write(srclist_template.render(id2codename=id2codename,
 775                                      id2relname=id2relname,
 776                                      mirror2name=mirror2name,
 777                                      mirror2url=mirror2url))
 778     sl.close()
 779
 780
 781 def write_mirmonlists(cfg, outdir):
 782     """Write list of mirrors in the format suitable for mirmon
 783
 784     It will reuse the same 'lists' directory
 785     """
 786     print "I: Composing mirmon lists"
 787     outdir = os.path.join(outdir, 'lists')
 788     create_dir(outdir)
 789
 790     for sec, sep in (('mirrors', ' '),
 791                          ('mirror names', ' - ')):
 792         entries = ['%s%s%s' % (mirror, sep, cfg.get(sec, mirror))
 793                    for mirror in cfg.options('mirrors')]
 794         f = open(os.path.join(outdir, 'mirmon-%s.txt' % sec.replace(' ', '-')),
 795                  'w')
 796         f.write('\n'.join(entries + ['']))
 797         f.close()
 798
 799
 800 def sort_by_tasks(db):
 801     tasks = {}
 802     for pkg in db.keys():
 803         if not 'blends' in db[pkg]:
 804             # no blend info
 805             continue
 806         blendinfo = db[pkg]['blends']
 807         if not 'tasks' in blendinfo:
 808             # no task info in blend data
 809             continue
 810         taskinfo = blendinfo['tasks']
 811         for task in taskinfo:
 812             taskname = task[1]
 813             if not taskname in tasks:
 814                 tasks[taskname] = []
 815             else:
 816                 tasks[taskname].append(pkg)
 817     return tasks
 818
 819
 820 def sort_by_maintainer(db):
 821     maints = {}
 822     maint_ids = {}
 823     for pkg in db.keys():
 824         maint = None
 825         pkginfo = db[pkg]
 826         # start with the blends info
 827         if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
 828             maint = pkginfo['blends']['responsible']
 829         if not 'main' in db[pkg] and maint is None:
 830             # no info
 831             continue
 832         info = db[pkg]['main']
 833         if not 'maintainer' in info and maint is None:
 834             # no maintainer info
 835             continue
 836         if 'original_maintainer' in info and not info['original_maintainer'] is None:
 837             maint = info['original_maintainer']
 838         elif 'maintainer' in info and not info['maintainer'] is None:
 839             maint = info['maintainer']
 840         if maint is None:
 841             # no sane maintainer info
 842             continue
 843         # safeguard: <> confuses sphinx and we don't care about different emails
 844         maint = maint[:maint.find('<')].strip()
 845         # kick out non-ascii ones (should not be, but too tired to find the bug)
 846         try:
 847             codecs.ascii_decode(maint)
 848         except UnicodeEncodeError:
 849             continue
 850         if not maint.lower() in maints:
 851             maints[maint.lower()] = []
 852             maint_ids[maint.lower()] = [maint]
 853         else:
 854             maint_ids[maint.lower()].append(maint)
 855         maints[maint.lower()].append(pkg)
 856     # remove duplicates
 857     out = {}
 858     for m in maints:
 859         out[maint_ids[m][0]] = np.unique(maints[m])
 860     return out
 861
 862
 863 def sort_by_release(db):
 864     rels = {}
 865     for pkg in db.keys():
 866         pkginfo = db[pkg]
 867         for sec in pkginfo:
 868             if not isinstance(sec, tuple):
 869                 # only releases are of interest
 870                 continue
 871             relname = sec[0]
 872             if not relname in rels:
 873                 rels[relname] = []
 874             else:
 875                 rels[relname].append(pkg)
 876     # remove duplicates
 877     for r in rels:
 878         rels[r] = np.unique(rels[r])
 879     return rels
 880
 881
 882 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
 883     create_dir(outdir)
 884     create_dir(os.path.join(outdir, 'pkgs'))
 885     create_dir(os.path.join(outdir, 'pkglists'))
 886     # template for individual package listings
 887     toc_template = jinja_env.get_template('pkgs_toc.rst')
 888     # the high-level package list overview
 889     hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
 890     hltoc.write('.. _pkglists:\n\n')
 891     hltoc.write(underline_text('Software packages', '='))
 892     defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
 893             (sort_by_release(db), 'By release', 'Packages for %s'),
 894             (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
 895     for def_ in defs:
 896         # TOC for each thingie
 897         pkgsdict, sectitle, title_tmpl = def_
 898         hltoc.write(underline_text(sectitle, '-'))
 899         ids = pkgsdict.keys()
 900         ids.sort()
 901         for id_ in ids:
 902             label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
 903             # filter out crap
 904             filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
 905             if not len(filtered_pkgs):
 906                 continue
 907             plist = toc_template.render(
 908                         label=label,
 909                         title=underline_text(title_tmpl % id_, '='),
 910                         pkgs=filtered_pkgs,
 911                         db=db)
 912             toc = codecs.open(os.path.join(outdir,
 913                                            'pkglists',
 914                                            '%s.rst' % label),
 915                               'w', 'utf-8')
 916             toc.write(plist)
 917             toc.close()
 918             hltoc.write('* :ref:`%s`\n' % label)
 919         hltoc.write('\n\n')
 920
 921
 922     # now a complete list of all packages
 923     hltoc.write(underline_text('Complete list', '-'))
 924     toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
 925                       'w', 'utf-8')
 926     toc.write(toc_template.render(label='full_pkg_list',
 927                 title=underline_text('Complete package list', '='),
 928                 pkgs=db.keys(), db=db))
 929     toc.close()
 930     hltoc.write('* :ref:`full_pkg_list`\n')
 931     hltoc.close()
 932
 933     # and now each individual package page
 934     pkg_template = jinja_env.get_template('pkg.rst')
 935     for p in db.keys():
 936         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
 937         # when no page is available skip this package
 938         if page is None:
 939             continue
 940         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
 941         pf.write(page)
 942         pf.close()
 943
 944
 945 def prepOptParser(op):
 946     # use module docstring for help output
 947     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
 948
 949     op.add_option("--db",
 950                   action="store", type="string", dest="db",
 951                   default=None,
 952                   help="Database file to read. Default: None")
 953
 954     op.add_option("--cfg",
 955                   action="store", type="string", dest="cfg",
 956                   default=None,
 957                   help="Repository config file.")
 958
 959     op.add_option("-o", "--outdir",
 960                   action="store", type="string", dest="outdir",
 961                   default=None,
 962                   help="Target directory for ReST output. Default: None")
 963
 964     op.add_option("-r", "--release-url",
 965                   action="append", dest="release_urls",
 966                   help="None")
 967
 968     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
 969                   type="string", default=None, help="None")
 970
 971     op.add_option("--extracts", action="store", dest="extracts_dir",
 972                   type="string", default=None, help="None")
 973
 974
 975 def main():
 976     op = OptionParser(version="%prog 0.0.2")
 977     prepOptParser(op)
 978
 979     (opts, args) = op.parse_args()
 980
 981     if len(args) != 1:
 982         print('There needs to be exactly one command')
 983         sys.exit(1)
 984
 985     cmd = args[0]
 986
 987     if opts.cfg is None:
 988         print("'--cfg' option is mandatory.")
 989         sys.exit(1)
 990     if opts.db is None:
 991         print("'--db' option is mandatory.")
 992         sys.exit(1)
 993
 994
 995     cfg = SafeConfigParser()
 996     cfg.read(opts.cfg)
 997
 998     if cmd == 'debug_ld':
 999         # load the db from file
1000         db = read_db(opts.db)
1001
1002         for p in db.keys():
1003         #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
1004             if not 'long_description' in db[p]['main']:
1005                 continue
1006             ld = db[p]['main']['long_description']
1007
1008             print ">>>>>>>>> ", p
1009             #print ld
1010             print "----"
1011             print convert_longdescr(ld)
1012         raise SystemExit
1013     # load existing db, unless renew is requested
1014     if cmd == 'updatedb':
1015         db = {}
1016         if cfg.has_option('packages', 'select taskfiles'):
1017             db = add_pkgfromtaskfile(db, cfg.get('packages',
1018                                                  'select taskfiles').split())
1019
1020         # add additional package names from config file
1021         if cfg.has_option('packages', 'select names'):
1022             for p in cfg.get('packages', 'select names').split():
1023                 if not db.has_key(p):
1024                     db[p] = get_emptydbentry()
1025
1026         # get info from task files
1027         if cfg.has_option('packages', 'prospective'):
1028             for url in cfg.get('packages', 'prospective').split():
1029                 db = import_blendstask(cfg, db, url)
1030
1031         # parse NeuroDebian repository
1032         if cfg.has_option('neurodebian', 'releases'):
1033             for rurl in cfg.get('neurodebian', 'releases').split():
1034                 db = import_release(cfg, db, rurl)
1035
1036         # collect package information from DDE
1037         db = import_dde(cfg, db)
1038         # get info from NITRC
1039         db = import_nitrc(cfg, db)
1040         # store the new DB
1041         store_db(db, opts.db)
1042         # and be done
1043         return
1044
1045     # load the db from file
1046     db = read_db(opts.db)
1047
1048     # fire up jinja
1049     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
1050
1051     # generate package pages and TOC and write them to files
1052     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
1053
1054     write_sourceslist(jinja_env, cfg, opts.outdir)
1055
1056     write_mirmonlists(cfg, opts.outdir)
1057
1058 if __name__ == "__main__":
1059     main()