]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
8160fe9ad60d672141bdcde337b3d8d773990e6c
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8 import apt
9 from ConfigParser import SafeConfigParser
10 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
11 import sys
12 import os
13 import shutil
14 import urllib2
15 import urllib
16 import codecs
17 import subprocess
18 # templating
19 from jinja2 import Environment, PackageLoader
20
21 from pprint import PrettyPrinter
22
23
24 class AptListsCache(object):
25     def __init__(self, cachedir='build/cache',
26                  ro_cachedirs=None,
27                  init_db=None):
28         self.cachedir = cachedir
29
30         if not ro_cachedirs is None:
31             self.ro_cachedirs = ro_cachedirs
32         else:
33             self.ro_cachedirs = []
34
35         # create cachedir
36         create_dir(self.cachedir)
37
38     def get(self, url, update=False):
39         """Looks in the cache if the file is there and takes the cached one.
40         Otherwise it is downloaded first.
41
42         Knows how to deal with http:// and svn:// URLs.
43
44         :Return:
45           file handler
46         """
47         # look whether it is compressed
48         cext = url.split('.')[-1]
49         if cext in ['gz', 'bz2']:
50             target_url = url[:-1 * len(cext) -1]
51         else:
52             # assume not compressed
53             target_url = url
54             cext = None
55
56         # turn url into a filename -- mimik what APT does for
57         # /var/lib/apt/lists/
58         tfilename = '_'.join(target_url.split('/')[2:])
59
60         # if we need to download anyway do not search
61         if update:
62             cfilename = os.path.join(self.cachedir, tfilename)
63         else:
64             # look for the uncompressed file anywhere in the cache
65             cfilename = None
66             for cp in [self.cachedir] + self.ro_cachedirs:
67                 if os.path.exists(os.path.join(cp, tfilename)):
68                     cfilename = os.path.join(cp, tfilename)
69
70         # nothing found?
71         if cfilename is None:
72             # add cache item
73             cfilename = os.path.join(self.cachedir, tfilename)
74             update = True
75
76         # if updated needed -- download
77         if update:
78             #print 'Caching file from %s' % url
79
80             if url.startswith('svn://'):
81                 # export from SVN
82                 pysvn.Client().export(url, cfilename)
83             if url.startswith('http://'):
84                 # download
85                 tempfile, ignored = urllib.urlretrieve(url)
86
87                 # decompress
88                 decompressor = None
89                 if cext == 'gz':
90                     decompressor = 'gzip'
91                 elif cext == 'bz2':
92                     decompressor = 'bzip2'
93                 elif cext == None:
94                     decompressor = None
95                 else:
96                     raise ValueError, \
97                           "Don't know how to decompress %s files" \
98                           % cext
99
100                 if not decompressor is None:
101                     if subprocess.call([decompressor, '-d', '-q', '-f',
102                                        tempfile]) == 1:
103                         raise RuntimeError, \
104                               "Something went wrong while decompressing '%s'" \
105                               % tempfile
106
107                 # move decompressed file into cache
108                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
109
110                 # XXX do we need that if explicit filename is provided?
111                 urllib.urlcleanup()
112
113         # open cached file
114         fh = codecs.open(cfilename, 'r', 'utf-8')
115
116         return fh
117
118
119 def add_pkgfromtaskfile(db, urls):
120     cache = AptListsCache()
121     pkgs = []
122
123     for task in urls:
124         fh = cache.get(task)
125
126         # loop over all stanzas
127         for stanza in deb822.Packages.iter_paragraphs(fh):
128             if stanza.has_key('Depends'):
129                 pkg = stanza['Depends']
130             elif stanza.has_key('Suggests'):
131                 pkg = stanza['Suggests']
132             else:
133                 continue
134
135             # account for multiple packages per line
136             if pkg.count(','):
137                 pkgs += [p.strip() for p in pkg.split(',')]
138             else:
139                 pkgs.append(pkg.strip())
140
141     for p in pkgs:
142         if not db.has_key(p):
143             db[p] = get_emptydbentry()
144
145     return db
146
147 def get_emptydbentry():
148     return {'main': {}}
149
150 def import_blendstask(db, url):
151     cache = AptListsCache()
152     fh = cache.get(url)
153     task_name = None
154
155     # figure out blend's task page URL, since they differ from blend to blend
156     urlsec = url.split('/')
157     blendname = urlsec[-3]
158     if blendname == 'debian-med':
159         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
160     elif blendname == 'debian-science':
161         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
162     else:
163         raise ValueError('Unknown blend "%s"' % blendname)
164     taskpage_url += urlsec[-1]
165
166     for st in deb822.Packages.iter_paragraphs(fh):
167         if st.has_key('Task'):
168             task_name = st['Task']
169             task = (blendname, task_name, taskpage_url)
170
171         if st.has_key('Depends'):
172             pkg = st['Depends']
173         elif st.has_key('Suggests'):
174             pkg = st['Suggests']
175         else:
176 #            print 'Warning: Cannot determine name of prospective package ' \
177 #                    '... ignoring. Dump follows:'
178 #            print st
179             continue
180
181         # take care of pkg lists
182         for p in pkg.split(', '):
183             if not db.has_key(p):
184                 print 'Ignoring blend package "%s"' % p
185                 continue
186
187             info = {}
188
189             # blends info
190             info['tasks'] = [task]
191             if st.has_key('License'):
192                 info['license'] = st['License']
193             if st.has_key('Responsible'):
194                 info['responsible'] = st['Responsible']
195
196             # pkg description
197             if st.has_key('Pkg-Description'):
198                 descr = st['Pkg-Description'].split('\n')
199                 info['description'] = descr[0].strip()
200                 info['long_description'] = \
201                         u'\n'.join(descr[1:])
202
203                 # charge the basic property set
204                 db[p]['main']['description'] = info['description']
205                 db[p]['main']['long_description'] = info['long_description']
206             if st.has_key('WNPP'):
207                 db[p]['main']['debian_itp'] = st['WNPP']
208             if st.has_key('Pkg-URL'):
209                 db[p]['main']['other_pkg'] = st['Pkg-URL']
210             if st.has_key('Homepage'):
211                 db[p]['main']['homepage'] = st['Homepage']
212
213             # Publications
214             if st.has_key('Published-Title'):
215                 pub = {'title': st['Published-Title']}
216                 if st.has_key('Published-Authors'):
217                     pub['authors'] = st['Published-Authors']
218                 if st.has_key('Published-Year'):
219                     pub['year'] = st['Published-Year']
220                 if st.has_key('Published-In'):
221                     pub['in'] = st['Published-In']
222                 if st.has_key('Published-URL'):
223                     pub['url'] = st['Published-URL']
224                 if st.has_key('Published-DOI'):
225                     pub['doi'] = st['Published-DOI']
226                     # need at least one URL
227                     if not pub.has_key('url'):
228                         pub['url'] = st['Published-DOI']
229
230                 db[p]['main']['publication'] = pub
231
232             # Registration
233             if st.has_key('Registration'):
234                 print 'HAVE REGISTRATION:', p
235                 db[p]['main']['registration'] = st['Registration']
236
237             # Remarks
238             if st.has_key('Remark'):
239                 # prepend a single space to make it look like a long description
240                 info['remark'] = convert_longdescr(' ' + st['Remark'])
241
242             # only store if there isn't something already
243             if not db[p].has_key('blends'):
244                 db[p]['blends'] = info
245             else:
246                 # just add this tasks name and id
247                 db[p]['blends']['tasks'].append(task)
248
249     return db
250
251
252 def get_releaseinfo(rurl):
253     cache = AptListsCache()
254     # root URL of the repository
255     baseurl = '/'.join(rurl.split('/')[:-1])
256     # get the release file from the cache
257     release_file = cache.get(rurl)
258
259     # create parser instance
260     rp = deb822.Release(release_file)
261
262     # architectures on this dist
263     archs = rp['Architectures'].split()
264     components = rp['Components'].split()
265     # compile a new codename that also considers the repository label
266     # to distinguish between official and unofficial repos.
267     label = rp['Label']
268     origin = rp['Origin']
269     codename = rp['Codename']
270     labelcode = '_'.join([rp['Label'], rp['Codename']])
271
272     # cleanup
273     release_file.close()
274
275     return {'baseurl': baseurl, 'archs': archs, 'components': components,
276             'codename': codename, 'label': label, 'labelcode': labelcode,
277             'origin': origin}
278
279
280 def build_pkgsurl(baseurl, component, arch):
281     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
282
283
284 def import_release(cfg, db, rurl):
285     cache = AptListsCache()
286
287     ri = get_releaseinfo(rurl)
288
289     # compile the list of Packages files to parse and parse them
290     for c in ri['components']:
291         for a in ri['archs']:
292             # compile packages URL
293             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
294
295             # retrieve from cache
296             packages_file = cache.get(pkgsurl)
297
298             # parse
299             for stanza in deb822.Packages.iter_paragraphs(packages_file):
300                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
301
302             # cleanup
303             packages_file.close()
304
305     return db
306
307 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
308     """
309     :Parameter:
310       st: Package section
311     """
312     pkg = st['Package']
313
314     # only care for known packages
315     if not db.has_key(pkg):
316 #        print 'Ignoring NeuroDebian package "%s"' % pkg
317         return db
318
319     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
320
321     if db[pkg].has_key(distkey):
322         info = db[pkg][distkey]
323     else:
324         info = {'architecture': []}
325
326     # fill in data
327     if not st['Architecture'] in info['architecture']:
328         info['architecture'].append(st['Architecture'])
329     info['maintainer'] = st['Maintainer']
330     if st.has_key('Homepage'):
331         info['homepage'] = st['Homepage']
332     info['version'] = st['Version']
333
334     # origin
335     info['distribution'] = origin
336     info['release'] = codename
337     info['component'] = component
338
339     # pool url
340     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
341
342     # pkg description
343     descr = st['Description'].replace('%', '%%').split('\n')
344     info['description'] = descr[0].strip()
345     info['long_description'] = u'\n'.join(descr[1:])
346
347     db[pkg][distkey] = info
348
349     # charge the basic property set
350     db[pkg]['main']['description'] = info['description']
351     db[pkg]['main']['long_description'] = info['long_description']
352     if st.has_key('Homepage'):
353         db[pkg]['main']['homepage'] = st['Homepage']
354
355     return db
356
357
358 def trans_codename(codename, cfg):
359     """Translate a known codename into a release description.
360
361     Unknown codenames will simply be returned as is.
362     """
363     # if we know something, tell
364     if codename in cfg.options('release codenames'):
365         return cfg.get('release codenames', codename)
366     else:
367         return codename
368
369
370 def create_dir(path):
371     if os.path.exists(path):
372         return
373
374     ps = path.split(os.path.sep)
375
376     for i in range(1,len(ps) + 1):
377         p = os.path.sep.join(ps[:i])
378
379         if not os.path.exists(p):
380             os.mkdir(p)
381
382
383 def dde_get(url):
384     try:
385         return json.read(urllib2.urlopen(url+"?t=json").read())['r']
386     except (urllib2.HTTPError, StopIteration):
387         print "SCREWED:", url
388         return False
389
390
391 def import_dde(cfg, db):
392     query_url = cfg.get('dde', 'pkgquery_url')
393     for p in db.keys():
394         # get freshest
395         q = dde_get(query_url + "/packages/all/%s" % p)
396         if q:
397             # copy all stuff, while preserving non-overlapping information
398             for k, v in q.iteritems():
399                 db[p]['main'][k] = v
400             # get latest popcon info for debian and ubuntu
401             # cannot use origin field itself, since it is none for few packages
402             # i.e. python-nifti
403             origin = q['drc'].split()[0]
404             if origin == 'ubuntu':
405                 if q.has_key('popcon'):
406                     db[p]['main']['ubuntu_popcon'] = q['popcon']
407                 # if we have ubuntu, need to get debian
408                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
409                 if q and q.has_key('popcon'):
410                     db[p]['main']['debian_popcon'] = q['popcon']
411             elif origin == 'debian':
412                 if q.has_key('popcon'):
413                     db[p]['main']['debian_popcon'] = q['popcon']
414                 # if we have debian, need to get ubuntu
415                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
416                 if q and q.has_key('popcon'):
417                     db[p]['main']['ubuntu_popcon'] = q['popcon']
418             else:
419                 print("Ignoring unkown origin '%s' for package '%s'." \
420                         % (origin, p))
421
422         # now get info for package from all releases in UDD
423         q = dde_get(query_url + "/dist/p:%s" % p)
424         if not q:
425             continue
426         # hold all info about this package per distribution release
427         info = {}
428         for cp in q:
429             distkey = (trans_codename(cp['release'], cfg),
430                        "%s-%s" % (cp['distribution'], cp['release']))
431             if not info.has_key(distkey):
432                 info[distkey] = cp
433                 # turn into a list to append others later
434                 info[distkey]['architecture'] = [info[distkey]['architecture']]
435             # accumulate data for multiple over archs
436             else:
437                 comp = apt.VersionCompare(cp['version'],
438                                           info[distkey]['version'])
439                 # found another arch for the same version
440                 if comp == 0:
441                     info[distkey]['architecture'].append(cp['architecture'])
442                 # found newer version, dump the old ones
443                 elif comp > 0:
444                     info[distkey] = cp
445                     # turn into a list to append others later
446                     info[distkey]['architecture'] = [info[distkey]['architecture']]
447                 # simply ignore older versions
448                 else:
449                     pass
450
451         # finally assign the new package data
452         for k, v in info.iteritems():
453             db[p][k] = v
454
455     return db
456
457
458 def convert_longdescr(ld):
459     ld = ld.replace('% ', '%% ')
460     ld = ld.split('\n')
461     for i, l in enumerate(ld):
462         if l == ' .':
463             ld[i] = ' #NEWLINEMARKER#'
464         # look for embedded lists
465         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
466             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
467
468     ld = u' '.join([l[1:] for l in ld])
469     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
470     # cleanup any leftover (e.g. trailing markers)
471     ld = ld.replace('#NEWLINEMARKER#', '')
472     return ld
473
474
475 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
476     # local binding for ease of use
477     db = db[pkg]
478     # do nothing if there is not at least the very basic stuff
479     if not db['main'].has_key('description'):
480         return
481     title = '**%s** -- %s' % (pkg, db['main']['description'])
482     underline = '*' * (len(title) + 2)
483     title = '%s\n %s\n%s' % (underline, title, underline)
484
485     page = template.render(
486             pkg=pkg,
487             title=title,
488             long_description=convert_longdescr(db['main']['long_description']),
489             cfg=cfg,
490             db=db)
491     # the following can be replaced by something like
492     # {% include "sidebar.html" ignore missing %}
493     # in the template whenever jinja 2.2 becomes available
494     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
495     if os.path.exists(addenum):
496         page += '\n\n.. include:: %s\n' % addenum
497     return page
498
499
500 def store_db(db, filename):
501     pp = PrettyPrinter(indent=2)
502     f = codecs.open(filename, 'w', 'utf-8')
503     f.write(pp.pformat(db))
504     f.close()
505
506
507 def read_db(filename):
508     f = codecs.open(filename, 'r', 'utf-8')
509     db = eval(f.read())
510     return db
511
512 def write_sourceslist(jinja_env, cfg, outdir):
513     create_dir(outdir)
514     create_dir(os.path.join(outdir, '_static'))
515
516     repos = {}
517     for release in cfg.options('release codenames'):
518         transrel = trans_codename(release, cfg)
519         repos[transrel] = []
520         for mirror in cfg.options('mirrors'):
521             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
522             repos[transrel].append((mirror, listname))
523             lf = open(os.path.join(outdir, '_static', listname), 'w')
524             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
525                                                       release)
526             lf.write('deb %s' % aptcfg)
527             lf.write('deb-src %s' % aptcfg)
528             lf.close()
529
530     srclist_template = jinja_env.get_template('sources_lists.rst')
531     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
532     sl.write(srclist_template.render(repos=repos))
533     sl.close()
534
535
536 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
537     create_dir(outdir)
538     create_dir(os.path.join(outdir, 'pkgs'))
539
540     # generate the TOC with all packages
541     toc_template = jinja_env.get_template('pkgs_toc.rst')
542     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
543     toc.write(toc_template.render(pkgs=db.keys()))
544     toc.close()
545
546     # and now each individual package page
547     pkg_template = jinja_env.get_template('pkg.rst')
548     for p in db.keys():
549         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
550         # when no page is available skip this package
551         if page is None:
552             continue
553         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
554         pf.write(page)
555         pf.close()
556
557
558 def prepOptParser(op):
559     # use module docstring for help output
560     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
561
562     op.add_option("--db",
563                   action="store", type="string", dest="db",
564                   default=None,
565                   help="Database file to read. Default: None")
566
567     op.add_option("--cfg",
568                   action="store", type="string", dest="cfg",
569                   default=None,
570                   help="Repository config file.")
571
572     op.add_option("-o", "--outdir",
573                   action="store", type="string", dest="outdir",
574                   default=None,
575                   help="Target directory for ReST output. Default: None")
576
577     op.add_option("-r", "--release-url",
578                   action="append", dest="release_urls",
579                   help="None")
580
581     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
582                   type="string", default=None, help="None")
583
584
585 def main():
586     op = OptionParser(version="%prog 0.0.2")
587     prepOptParser(op)
588
589     (opts, args) = op.parse_args()
590
591     if len(args) != 1:
592         print('There needs to be exactly one command')
593         sys.exit(1)
594
595     cmd = args[0]
596
597     if opts.cfg is None:
598         print("'--cfg' option is mandatory.")
599         sys.exit(1)
600     if opts.db is None:
601         print("'--db' option is mandatory.")
602         sys.exit(1)
603
604
605     cfg = SafeConfigParser()
606     cfg.read(opts.cfg)
607
608     # load existing db, unless renew is requested
609     if cmd == 'updatedb':
610         db = {}
611         if cfg.has_option('packages', 'select taskfiles'):
612             db = add_pkgfromtaskfile(db, cfg.get('packages',
613                                                  'select taskfiles').split())
614
615         # add additional package names from config file
616         if cfg.has_option('packages', 'select names'):
617             for p in cfg.get('packages', 'select names').split():
618                 if not db.has_key(p):
619                     db[p] = get_emptydbentry()
620
621         # get info from task files
622         if cfg.has_option('packages', 'prospective'):
623             for url in cfg.get('packages', 'prospective').split():
624                 db = import_blendstask(db, url)
625
626         # parse NeuroDebian repository
627         if cfg.has_option('neurodebian', 'releases'):
628             for rurl in cfg.get('neurodebian', 'releases').split():
629                 db = import_release(cfg, db, rurl)
630
631         # collect package information from DDE
632         db = import_dde(cfg, db)
633         # store the new DB
634         store_db(db, opts.db)
635         # and be done
636         return
637
638     # load the db from file
639     db = read_db(opts.db)
640
641     # fire up jinja
642     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
643
644     # generate package pages and TOC and write them to files
645     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
646
647     write_sourceslist(jinja_env, cfg, opts.outdir)
648
649 if __name__ == "__main__":
650     main()