]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
NF: List recommened and suggested packages as 'related packages'.
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8 import apt
9 from ConfigParser import SafeConfigParser
10 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
11 import sys
12 import os
13 import shutil
14 import urllib2
15 import urllib
16 import codecs
17 import subprocess
18 # templating
19 from jinja2 import Environment, PackageLoader
20
21 from pprint import PrettyPrinter
22
23
24 class AptListsCache(object):
25     def __init__(self, cachedir='build/cache',
26                  ro_cachedirs=None,
27                  init_db=None):
28         self.cachedir = cachedir
29
30         if not ro_cachedirs is None:
31             self.ro_cachedirs = ro_cachedirs
32         else:
33             self.ro_cachedirs = []
34
35         # create cachedir
36         create_dir(self.cachedir)
37
38     def get(self, url, update=False):
39         """Looks in the cache if the file is there and takes the cached one.
40         Otherwise it is downloaded first.
41
42         Knows how to deal with http:// and svn:// URLs.
43
44         :Return:
45           file handler
46         """
47         # look whether it is compressed
48         cext = url.split('.')[-1]
49         if cext in ['gz', 'bz2']:
50             target_url = url[:-1 * len(cext) -1]
51         else:
52             # assume not compressed
53             target_url = url
54             cext = None
55
56         # turn url into a filename -- mimik what APT does for
57         # /var/lib/apt/lists/
58         tfilename = '_'.join(target_url.split('/')[2:])
59
60         # if we need to download anyway do not search
61         if update:
62             cfilename = os.path.join(self.cachedir, tfilename)
63         else:
64             # look for the uncompressed file anywhere in the cache
65             cfilename = None
66             for cp in [self.cachedir] + self.ro_cachedirs:
67                 if os.path.exists(os.path.join(cp, tfilename)):
68                     cfilename = os.path.join(cp, tfilename)
69
70         # nothing found?
71         if cfilename is None:
72             # add cache item
73             cfilename = os.path.join(self.cachedir, tfilename)
74             update = True
75
76         # if updated needed -- download
77         if update:
78             #print 'Caching file from %s' % url
79
80             if url.startswith('svn://'):
81                 # export from SVN
82                 pysvn.Client().export(url, cfilename)
83             if url.startswith('http://'):
84                 # download
85                 tempfile, ignored = urllib.urlretrieve(url)
86
87                 # decompress
88                 decompressor = None
89                 if cext == 'gz':
90                     decompressor = 'gzip'
91                 elif cext == 'bz2':
92                     decompressor = 'bzip2'
93                 elif cext == None:
94                     decompressor = None
95                 else:
96                     raise ValueError, \
97                           "Don't know how to decompress %s files" \
98                           % cext
99
100                 if not decompressor is None:
101                     if subprocess.call([decompressor, '-d', '-q', '-f',
102                                        tempfile]) == 1:
103                         raise RuntimeError, \
104                               "Something went wrong while decompressing '%s'" \
105                               % tempfile
106
107                 # move decompressed file into cache
108                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
109
110                 # XXX do we need that if explicit filename is provided?
111                 urllib.urlcleanup()
112
113         # open cached file
114         fh = codecs.open(cfilename, 'r', 'utf-8')
115
116         return fh
117
118
119 def add_pkgfromtaskfile(db, urls):
120     cache = AptListsCache()
121     pkgs = []
122
123     for task in urls:
124         fh = cache.get(task)
125
126         # loop over all stanzas
127         for stanza in deb822.Packages.iter_paragraphs(fh):
128             if stanza.has_key('Depends'):
129                 pkg = stanza['Depends']
130             elif stanza.has_key('Suggests'):
131                 pkg = stanza['Suggests']
132             else:
133                 continue
134
135             # account for multiple packages per line
136             if pkg.count(','):
137                 pkgs += [p.strip() for p in pkg.split(',')]
138             else:
139                 pkgs.append(pkg.strip())
140
141     for p in pkgs:
142         if not db.has_key(p):
143             db[p] = get_emptydbentry()
144
145     return db
146
147 def get_emptydbentry():
148     return {'main': {}}
149
150 def import_blendstask(db, url):
151     cache = AptListsCache()
152     fh = cache.get(url)
153     task_name = None
154
155     # figure out blend's task page URL, since they differ from blend to blend
156     urlsec = url.split('/')
157     blendname = urlsec[-3]
158     if blendname == 'debian-med':
159         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
160     elif blendname == 'debian-science':
161         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
162     else:
163         raise ValueError('Unknown blend "%s"' % blendname)
164     taskpage_url += urlsec[-1]
165
166     for st in deb822.Packages.iter_paragraphs(fh):
167         if st.has_key('Task'):
168             task_name = st['Task']
169             task = (blendname, task_name, taskpage_url)
170
171         if st.has_key('Depends'):
172             pkg = st['Depends']
173         elif st.has_key('Suggests'):
174             pkg = st['Suggests']
175         else:
176 #            print 'Warning: Cannot determine name of prospective package ' \
177 #                    '... ignoring. Dump follows:'
178 #            print st
179             continue
180
181         # take care of pkg lists
182         for p in pkg.split(', '):
183             if not db.has_key(p):
184                 print 'Ignoring blend package "%s"' % p
185                 continue
186
187             info = {}
188
189             # blends info
190             info['tasks'] = [task]
191             if st.has_key('License'):
192                 info['license'] = st['License']
193             if st.has_key('Responsible'):
194                 info['responsible'] = st['Responsible']
195
196             # pkg description
197             if st.has_key('Pkg-Description'):
198                 descr = st['Pkg-Description'].split('\n')
199                 info['description'] = descr[0].strip()
200                 info['long_description'] = \
201                         u'\n'.join(descr[1:])
202
203                 # charge the basic property set
204                 db[p]['main']['description'] = info['description']
205                 db[p]['main']['long_description'] = info['long_description']
206             if st.has_key('WNPP'):
207                 db[p]['main']['debian_itp'] = st['WNPP']
208             if st.has_key('Pkg-URL'):
209                 db[p]['main']['other_pkg'] = st['Pkg-URL']
210             if st.has_key('Homepage'):
211                 db[p]['main']['homepage'] = st['Homepage']
212
213             # Publications
214             if st.has_key('Published-Title'):
215                 pub = {'title': st['Published-Title']}
216                 if st.has_key('Published-Authors'):
217                     pub['authors'] = st['Published-Authors']
218                 if st.has_key('Published-Year'):
219                     pub['year'] = st['Published-Year']
220                 if st.has_key('Published-In'):
221                     pub['in'] = st['Published-In']
222                 if st.has_key('Published-URL'):
223                     pub['url'] = st['Published-URL']
224                 if st.has_key('Published-DOI'):
225                     pub['doi'] = st['Published-DOI']
226                     # need at least one URL
227                     if not pub.has_key('url'):
228                         pub['url'] = st['Published-DOI']
229
230                 db[p]['main']['publication'] = pub
231
232             # Registration
233             if st.has_key('Registration'):
234                 db[p]['main']['registration'] = st['Registration']
235
236             # Remarks
237             if st.has_key('Remark'):
238                 # prepend a single space to make it look like a long description
239                 info['remark'] = convert_longdescr(' ' + st['Remark'])
240
241             # only store if there isn't something already
242             if not db[p].has_key('blends'):
243                 db[p]['blends'] = info
244             else:
245                 # just add this tasks name and id
246                 db[p]['blends']['tasks'].append(task)
247
248     return db
249
250
251 def get_releaseinfo(rurl):
252     cache = AptListsCache()
253     # root URL of the repository
254     baseurl = '/'.join(rurl.split('/')[:-1])
255     # get the release file from the cache
256     release_file = cache.get(rurl)
257
258     # create parser instance
259     rp = deb822.Release(release_file)
260
261     # architectures on this dist
262     archs = rp['Architectures'].split()
263     components = rp['Components'].split()
264     # compile a new codename that also considers the repository label
265     # to distinguish between official and unofficial repos.
266     label = rp['Label']
267     origin = rp['Origin']
268     codename = rp['Codename']
269     labelcode = '_'.join([rp['Label'], rp['Codename']])
270
271     # cleanup
272     release_file.close()
273
274     return {'baseurl': baseurl, 'archs': archs, 'components': components,
275             'codename': codename, 'label': label, 'labelcode': labelcode,
276             'origin': origin}
277
278
279 def build_pkgsurl(baseurl, component, arch):
280     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
281
282
283 def import_release(cfg, db, rurl):
284     cache = AptListsCache()
285
286     ri = get_releaseinfo(rurl)
287
288     # compile the list of Packages files to parse and parse them
289     for c in ri['components']:
290         for a in ri['archs']:
291             # compile packages URL
292             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
293
294             # retrieve from cache
295             packages_file = cache.get(pkgsurl)
296
297             # parse
298             for stanza in deb822.Packages.iter_paragraphs(packages_file):
299                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
300
301             # cleanup
302             packages_file.close()
303
304     return db
305
306 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
307     """
308     :Parameter:
309       st: Package section
310     """
311     pkg = st['Package']
312
313     # only care for known packages
314     if not db.has_key(pkg):
315 #        print 'Ignoring NeuroDebian package "%s"' % pkg
316         return db
317
318     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
319
320     if db[pkg].has_key(distkey):
321         info = db[pkg][distkey]
322     else:
323         info = {'architecture': []}
324
325     # fill in data
326     if not st['Architecture'] in info['architecture']:
327         info['architecture'].append(st['Architecture'])
328     info['maintainer'] = st['Maintainer']
329     if st.has_key('Homepage'):
330         info['homepage'] = st['Homepage']
331     info['version'] = st['Version']
332
333     # origin
334     info['distribution'] = origin
335     info['release'] = codename
336     info['component'] = component
337
338     # pool url
339     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
340
341     # pkg description
342     descr = st['Description'].replace('%', '%%').split('\n')
343     info['description'] = descr[0].strip()
344     info['long_description'] = u'\n'.join(descr[1:])
345
346     db[pkg][distkey] = info
347
348     # charge the basic property set
349     db[pkg]['main']['description'] = info['description']
350     db[pkg]['main']['long_description'] = info['long_description']
351     if st.has_key('Source'):
352         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
353     else:
354         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
355     if st.has_key('Homepage'):
356         db[pkg]['main']['homepage'] = st['Homepage']
357     if st.has_key('Recommends'):
358         db[pkg]['main']['recommends'] = st['Recommends']
359
360     return db
361
362
363 def trans_codename(codename, cfg):
364     """Translate a known codename into a release description.
365
366     Unknown codenames will simply be returned as is.
367     """
368     # if we know something, tell
369     if codename in cfg.options('release codenames'):
370         return cfg.get('release codenames', codename)
371     else:
372         return codename
373
374
375 def create_dir(path):
376     if os.path.exists(path):
377         return
378
379     ps = path.split(os.path.sep)
380
381     for i in range(1,len(ps) + 1):
382         p = os.path.sep.join(ps[:i])
383
384         if not os.path.exists(p):
385             os.mkdir(p)
386
387
388 def dde_get(url):
389     try:
390         return json.read(urllib2.urlopen(url+"?t=json").read())['r']
391     except (urllib2.HTTPError, StopIteration):
392         print "NO PKG INFO AT:", url
393         return False
394
395
396 def import_dde(cfg, db):
397     query_url = cfg.get('dde', 'pkgquery_url')
398     for p in db.keys():
399         # get freshest
400         q = dde_get(query_url + "/packages/all/%s" % p)
401         if q:
402             # copy all stuff, while preserving non-overlapping information
403             for k, v in q.iteritems():
404                 db[p]['main'][k] = v
405             # get latest popcon info for debian and ubuntu
406             # cannot use origin field itself, since it is none for few packages
407             # i.e. python-nifti
408             origin = q['drc'].split()[0]
409             if origin == 'ubuntu':
410                 if q.has_key('popcon'):
411                     db[p]['main']['ubuntu_popcon'] = q['popcon']
412                 # if we have ubuntu, need to get debian
413                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
414                 if q and q.has_key('popcon'):
415                     db[p]['main']['debian_popcon'] = q['popcon']
416             elif origin == 'debian':
417                 if q.has_key('popcon'):
418                     db[p]['main']['debian_popcon'] = q['popcon']
419                 # if we have debian, need to get ubuntu
420                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
421                 if q and q.has_key('popcon'):
422                     db[p]['main']['ubuntu_popcon'] = q['popcon']
423             else:
424                 print("Ignoring unkown origin '%s' for package '%s'." \
425                         % (origin, p))
426
427         # now get info for package from all releases in UDD
428         q = dde_get(query_url + "/dist/p:%s" % p)
429         if not q:
430             continue
431         # hold all info about this package per distribution release
432         info = {}
433         for cp in q:
434             distkey = (trans_codename(cp['release'], cfg),
435                        "%s-%s" % (cp['distribution'], cp['release']))
436             if not info.has_key(distkey):
437                 info[distkey] = cp
438                 # turn into a list to append others later
439                 info[distkey]['architecture'] = [info[distkey]['architecture']]
440             # accumulate data for multiple over archs
441             else:
442                 comp = apt.VersionCompare(cp['version'],
443                                           info[distkey]['version'])
444                 # found another arch for the same version
445                 if comp == 0:
446                     info[distkey]['architecture'].append(cp['architecture'])
447                 # found newer version, dump the old ones
448                 elif comp > 0:
449                     info[distkey] = cp
450                     # turn into a list to append others later
451                     info[distkey]['architecture'] = [info[distkey]['architecture']]
452                 # simply ignore older versions
453                 else:
454                     pass
455
456         # finally assign the new package data
457         for k, v in info.iteritems():
458             db[p][k] = v
459
460     return db
461
462
463 def convert_longdescr(ld):
464     ld = ld.replace('% ', '%% ')
465     ld = ld.split('\n')
466     for i, l in enumerate(ld):
467         if l == ' .':
468             ld[i] = ' #NEWLINEMARKER#'
469         # look for embedded lists
470         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
471             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
472
473     ld = u' '.join([l[1:] for l in ld])
474     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
475     # cleanup any leftover (e.g. trailing markers)
476     ld = ld.replace('#NEWLINEMARKER#', '')
477     return ld
478
479
480 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
481     # local binding for ease of use
482     pkgdb = db[pkg]
483     # do nothing if there is not at least the very basic stuff
484     if not pkgdb['main'].has_key('description'):
485         return
486     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
487     underline = '*' * (len(title) + 2)
488     title = '%s\n %s\n%s' % (underline, title, underline)
489
490     page = template.render(
491             pkg=pkg,
492             title=title,
493             long_description=convert_longdescr(pkgdb['main']['long_description']),
494             cfg=cfg,
495             db=pkgdb,
496             fulldb=db)
497     # the following can be replaced by something like
498     # {% include "sidebar.html" ignore missing %}
499     # in the template whenever jinja 2.2 becomes available
500     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
501     if os.path.exists(addenum):
502         page += '\n\n.. include:: %s\n' % addenum
503     return page
504
505
506 def store_db(db, filename):
507     pp = PrettyPrinter(indent=2)
508     f = codecs.open(filename, 'w', 'utf-8')
509     f.write(pp.pformat(db))
510     f.close()
511
512
513 def read_db(filename):
514     f = codecs.open(filename, 'r', 'utf-8')
515     db = eval(f.read())
516     return db
517
518 def write_sourceslist(jinja_env, cfg, outdir):
519     create_dir(outdir)
520     create_dir(os.path.join(outdir, '_static'))
521
522     repos = {}
523     for release in cfg.options('release codenames'):
524         transrel = trans_codename(release, cfg)
525         repos[transrel] = []
526         for mirror in cfg.options('mirrors'):
527             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
528             repos[transrel].append((mirror, listname))
529             lf = open(os.path.join(outdir, '_static', listname), 'w')
530             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
531                                                       release)
532             lf.write('deb %s' % aptcfg)
533             lf.write('deb-src %s' % aptcfg)
534             lf.close()
535
536     srclist_template = jinja_env.get_template('sources_lists.rst')
537     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
538     sl.write(srclist_template.render(repos=repos))
539     sl.close()
540
541
542 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
543     create_dir(outdir)
544     create_dir(os.path.join(outdir, 'pkgs'))
545
546     # generate the TOC with all packages
547     toc_template = jinja_env.get_template('pkgs_toc.rst')
548     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
549     toc.write(toc_template.render(pkgs=db.keys()))
550     toc.close()
551
552     # and now each individual package page
553     pkg_template = jinja_env.get_template('pkg.rst')
554     for p in db.keys():
555         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
556         # when no page is available skip this package
557         if page is None:
558             continue
559         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
560         pf.write(page)
561         pf.close()
562
563
564 def prepOptParser(op):
565     # use module docstring for help output
566     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
567
568     op.add_option("--db",
569                   action="store", type="string", dest="db",
570                   default=None,
571                   help="Database file to read. Default: None")
572
573     op.add_option("--cfg",
574                   action="store", type="string", dest="cfg",
575                   default=None,
576                   help="Repository config file.")
577
578     op.add_option("-o", "--outdir",
579                   action="store", type="string", dest="outdir",
580                   default=None,
581                   help="Target directory for ReST output. Default: None")
582
583     op.add_option("-r", "--release-url",
584                   action="append", dest="release_urls",
585                   help="None")
586
587     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
588                   type="string", default=None, help="None")
589
590
591 def main():
592     op = OptionParser(version="%prog 0.0.2")
593     prepOptParser(op)
594
595     (opts, args) = op.parse_args()
596
597     if len(args) != 1:
598         print('There needs to be exactly one command')
599         sys.exit(1)
600
601     cmd = args[0]
602
603     if opts.cfg is None:
604         print("'--cfg' option is mandatory.")
605         sys.exit(1)
606     if opts.db is None:
607         print("'--db' option is mandatory.")
608         sys.exit(1)
609
610
611     cfg = SafeConfigParser()
612     cfg.read(opts.cfg)
613
614     # load existing db, unless renew is requested
615     if cmd == 'updatedb':
616         db = {}
617         if cfg.has_option('packages', 'select taskfiles'):
618             db = add_pkgfromtaskfile(db, cfg.get('packages',
619                                                  'select taskfiles').split())
620
621         # add additional package names from config file
622         if cfg.has_option('packages', 'select names'):
623             for p in cfg.get('packages', 'select names').split():
624                 if not db.has_key(p):
625                     db[p] = get_emptydbentry()
626
627         # get info from task files
628         if cfg.has_option('packages', 'prospective'):
629             for url in cfg.get('packages', 'prospective').split():
630                 db = import_blendstask(db, url)
631
632         # parse NeuroDebian repository
633         if cfg.has_option('neurodebian', 'releases'):
634             for rurl in cfg.get('neurodebian', 'releases').split():
635                 db = import_release(cfg, db, rurl)
636
637         # collect package information from DDE
638         db = import_dde(cfg, db)
639         # store the new DB
640         store_db(db, opts.db)
641         # and be done
642         return
643
644     # load the db from file
645     db = read_db(opts.db)
646
647     # fire up jinja
648     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
649
650     # generate package pages and TOC and write them to files
651     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
652
653     write_sourceslist(jinja_env, cfg, opts.outdir)
654
655 if __name__ == "__main__":
656     main()