]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
729b7997c3e9b830cb5cbc7e042306d45e01cab3
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8 import apt
9 from ConfigParser import SafeConfigParser
10 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
11 import sys
12 import os
13 import shutil
14 import urllib2
15 import urllib
16 import codecs
17 import subprocess
18 import time
19 # templating
20 from jinja2 import Environment, PackageLoader
21
22 from pprint import PrettyPrinter
23
24
25 class AptListsCache(object):
26     def __init__(self, cachedir='build/cache',
27                  ro_cachedirs=None,
28                  init_db=None):
29         self.cachedir = cachedir
30
31         if not ro_cachedirs is None:
32             self.ro_cachedirs = ro_cachedirs
33         else:
34             self.ro_cachedirs = []
35
36         # create cachedir
37         create_dir(self.cachedir)
38
39     def get(self, url, update=False):
40         """Looks in the cache if the file is there and takes the cached one.
41         Otherwise it is downloaded first.
42
43         Knows how to deal with http:// and svn:// URLs.
44
45         :Return:
46           file handler
47         """
48         # look whether it is compressed
49         cext = url.split('.')[-1]
50         if cext in ['gz', 'bz2']:
51             target_url = url[:-1 * len(cext) -1]
52         else:
53             # assume not compressed
54             target_url = url
55             cext = None
56
57         # turn url into a filename -- mimik what APT does for
58         # /var/lib/apt/lists/
59         tfilename = '_'.join(target_url.split('/')[2:])
60
61         # if we need to download anyway do not search
62         if update:
63             cfilename = os.path.join(self.cachedir, tfilename)
64         else:
65             # look for the uncompressed file anywhere in the cache
66             cfilename = None
67             for cp in [self.cachedir] + self.ro_cachedirs:
68                 if os.path.exists(os.path.join(cp, tfilename)):
69                     cfilename = os.path.join(cp, tfilename)
70
71         # nothing found?
72         if cfilename is None:
73             # add cache item
74             cfilename = os.path.join(self.cachedir, tfilename)
75             update = True
76
77         # if updated needed -- download
78         if update:
79             #print 'Caching file from %s' % url
80
81             if url.startswith('svn://'):
82                 # export from SVN
83                 pysvn.Client().export(url, cfilename)
84             if url.startswith('http://'):
85                 # download
86                 tempfile, ignored = urllib.urlretrieve(url)
87
88                 # decompress
89                 decompressor = None
90                 if cext == 'gz':
91                     decompressor = 'gzip'
92                 elif cext == 'bz2':
93                     decompressor = 'bzip2'
94                 elif cext == None:
95                     decompressor = None
96                 else:
97                     raise ValueError, \
98                           "Don't know how to decompress %s files" \
99                           % cext
100
101                 if not decompressor is None:
102                     if subprocess.call([decompressor, '-d', '-q', '-f',
103                                        tempfile]) == 1:
104                         raise RuntimeError, \
105                               "Something went wrong while decompressing '%s'" \
106                               % tempfile
107
108                 # move decompressed file into cache
109                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
110
111                 # XXX do we need that if explicit filename is provided?
112                 urllib.urlcleanup()
113
114         # open cached file
115         fh = codecs.open(cfilename, 'r', 'utf-8')
116
117         return fh
118
119
120 def add_pkgfromtaskfile(db, urls):
121     cache = AptListsCache()
122     pkgs = []
123
124     for task in urls:
125         fh = cache.get(task)
126
127         # loop over all stanzas
128         for stanza in deb822.Packages.iter_paragraphs(fh):
129             if stanza.has_key('Depends'):
130                 pkg = stanza['Depends']
131             elif stanza.has_key('Suggests'):
132                 pkg = stanza['Suggests']
133             else:
134                 continue
135
136             # account for multiple packages per line
137             if pkg.count(','):
138                 pkgs += [p.strip() for p in pkg.split(',')]
139             else:
140                 pkgs.append(pkg.strip())
141
142     for p in pkgs:
143         if not db.has_key(p):
144             db[p] = get_emptydbentry()
145
146     return db
147
148 def get_emptydbentry():
149     return {'main': {}}
150
151 def import_blendstask(db, url):
152     cache = AptListsCache()
153     fh = cache.get(url)
154     task_name = None
155
156     # figure out blend's task page URL, since they differ from blend to blend
157     urlsec = url.split('/')
158     blendname = urlsec[-3]
159     if blendname == 'debian-med':
160         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
161     elif blendname == 'debian-science':
162         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
163     else:
164         raise ValueError('Unknown blend "%s"' % blendname)
165     taskpage_url += urlsec[-1]
166
167     for st in deb822.Packages.iter_paragraphs(fh):
168         if st.has_key('Task'):
169             task_name = st['Task']
170             task = (blendname, task_name, taskpage_url)
171
172         if st.has_key('Depends'):
173             pkg = st['Depends']
174         elif st.has_key('Suggests'):
175             pkg = st['Suggests']
176         else:
177 #            print 'Warning: Cannot determine name of prospective package ' \
178 #                    '... ignoring. Dump follows:'
179 #            print st
180             continue
181
182         # take care of pkg lists
183         for p in pkg.split(', '):
184             if not db.has_key(p):
185                 print 'Ignoring blend package "%s"' % p
186                 continue
187
188             info = {}
189
190             # blends info
191             info['tasks'] = [task]
192             if st.has_key('License'):
193                 info['license'] = st['License']
194             if st.has_key('Responsible'):
195                 info['responsible'] = st['Responsible']
196
197             # pkg description
198             if st.has_key('Pkg-Description'):
199                 descr = st['Pkg-Description'].split('\n')
200                 info['description'] = descr[0].strip()
201                 info['long_description'] = \
202                         u'\n'.join(descr[1:])
203
204                 # charge the basic property set
205                 db[p]['main']['description'] = info['description']
206                 db[p]['main']['long_description'] = info['long_description']
207             if st.has_key('WNPP'):
208                 db[p]['main']['debian_itp'] = st['WNPP']
209             if st.has_key('Pkg-URL'):
210                 db[p]['main']['other_pkg'] = st['Pkg-URL']
211             if st.has_key('Homepage'):
212                 db[p]['main']['homepage'] = st['Homepage']
213
214             # Publications
215             if st.has_key('Published-Title'):
216                 pub = {'title': st['Published-Title']}
217                 if st.has_key('Published-Authors'):
218                     pub['authors'] = st['Published-Authors']
219                 if st.has_key('Published-Year'):
220                     pub['year'] = st['Published-Year']
221                 if st.has_key('Published-In'):
222                     pub['in'] = st['Published-In']
223                 if st.has_key('Published-URL'):
224                     pub['url'] = st['Published-URL']
225                 if st.has_key('Published-DOI'):
226                     pub['doi'] = st['Published-DOI']
227                     # need at least one URL
228                     if not pub.has_key('url'):
229                         pub['url'] = st['Published-DOI']
230
231                 db[p]['main']['publication'] = pub
232
233             # Registration
234             if st.has_key('Registration'):
235                 db[p]['main']['registration'] = st['Registration']
236
237             # Remarks
238             if st.has_key('Remark'):
239                 # prepend a single space to make it look like a long description
240                 info['remark'] = convert_longdescr(' ' + st['Remark'])
241
242             # only store if there isn't something already
243             if not db[p].has_key('blends'):
244                 db[p]['blends'] = info
245             else:
246                 # just add this tasks name and id
247                 db[p]['blends']['tasks'].append(task)
248
249     return db
250
251
252 def get_releaseinfo(rurl):
253     cache = AptListsCache()
254     # root URL of the repository
255     baseurl = '/'.join(rurl.split('/')[:-1])
256     # get the release file from the cache
257     release_file = cache.get(rurl)
258
259     # create parser instance
260     rp = deb822.Release(release_file)
261
262     # architectures on this dist
263     archs = rp['Architectures'].split()
264     components = rp['Components'].split()
265     # compile a new codename that also considers the repository label
266     # to distinguish between official and unofficial repos.
267     label = rp['Label']
268     origin = rp['Origin']
269     codename = rp['Codename']
270     labelcode = '_'.join([rp['Label'], rp['Codename']])
271
272     # cleanup
273     release_file.close()
274
275     return {'baseurl': baseurl, 'archs': archs, 'components': components,
276             'codename': codename, 'label': label, 'labelcode': labelcode,
277             'origin': origin}
278
279
280 def build_pkgsurl(baseurl, component, arch):
281     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
282
283
284 def import_release(cfg, db, rurl):
285     cache = AptListsCache()
286
287     ri = get_releaseinfo(rurl)
288
289     # compile the list of Packages files to parse and parse them
290     for c in ri['components']:
291         for a in ri['archs']:
292             # compile packages URL
293             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
294
295             # retrieve from cache
296             packages_file = cache.get(pkgsurl)
297
298             # parse
299             for stanza in deb822.Packages.iter_paragraphs(packages_file):
300                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
301
302             # cleanup
303             packages_file.close()
304
305     return db
306
307 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
308     """
309     :Parameter:
310       st: Package section
311     """
312     pkg = st['Package']
313
314     # only care for known packages
315     if not db.has_key(pkg):
316 #        print 'Ignoring NeuroDebian package "%s"' % pkg
317         return db
318
319     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
320
321     if db[pkg].has_key(distkey):
322         info = db[pkg][distkey]
323     else:
324         info = {'architecture': []}
325
326     # fill in data
327     if not st['Architecture'] in info['architecture']:
328         info['architecture'].append(st['Architecture'])
329     info['maintainer'] = st['Maintainer']
330     if st.has_key('Homepage'):
331         info['homepage'] = st['Homepage']
332     info['version'] = st['Version']
333
334     # origin
335     info['distribution'] = origin
336     info['release'] = codename
337     info['component'] = component
338
339     # pool url
340     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
341
342     # pkg description
343     descr = st['Description'].replace('%', '%%').split('\n')
344     info['description'] = descr[0].strip()
345     info['long_description'] = u'\n'.join(descr[1:])
346
347     db[pkg][distkey] = info
348
349     # charge the basic property set
350     db[pkg]['main']['description'] = info['description']
351     db[pkg]['main']['long_description'] = info['long_description']
352     if st.has_key('Source'):
353         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
354     else:
355         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
356     if st.has_key('Homepage'):
357         db[pkg]['main']['homepage'] = st['Homepage']
358     if st.has_key('Recommends'):
359         db[pkg]['main']['recommends'] = st['Recommends']
360
361     return db
362
363
364 def trans_codename(codename, cfg):
365     """Translate a known codename into a release description.
366
367     Unknown codenames will simply be returned as is.
368     """
369     # if we know something, tell
370     if codename in cfg.options('release codenames'):
371         return cfg.get('release codenames', codename)
372     else:
373         return codename
374
375
376 def create_dir(path):
377     if os.path.exists(path):
378         return
379
380     ps = path.split(os.path.sep)
381
382     for i in range(1,len(ps) + 1):
383         p = os.path.sep.join(ps[:i])
384
385         if not os.path.exists(p):
386             os.mkdir(p)
387
388
389 def dde_get(url, fail=False):
390     # enforce delay to be friendly to DDE
391     time.sleep(3)
392     try:
393         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
394         print "SUCCESS:", url
395         return data
396     except urllib2.HTTPError, e:
397         print "NOINFO:", url, type(e)
398         return False
399     except urllib2.URLError, e:
400         print "URLERROR:", url, type(e)
401         if fail:
402             print "Permanant failure"
403             return False
404         print "Try again after 30 seconds..."
405         time.sleep(30)
406         return dde_get(url, fail=True)
407     except (StopIteration):
408         print "NOINFO:", url
409         return False
410
411
412 def import_dde(cfg, db):
413     query_url = cfg.get('dde', 'pkgquery_url')
414     for p in db.keys():
415         # get freshest
416         q = dde_get(query_url + "/packages/all/%s" % p)
417         if q:
418             # copy all stuff, while preserving non-overlapping information
419             for k, v in q.iteritems():
420                 db[p]['main'][k] = v
421             # get latest popcon info for debian and ubuntu
422             # cannot use origin field itself, since it is none for few packages
423             # i.e. python-nifti
424             origin = q['drc'].split()[0]
425             if origin == 'ubuntu':
426                 if q.has_key('popcon'):
427                     db[p]['main']['ubuntu_popcon'] = q['popcon']
428                 # if we have ubuntu, need to get debian
429                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
430                 if q and q.has_key('popcon'):
431                     db[p]['main']['debian_popcon'] = q['popcon']
432             elif origin == 'debian':
433                 if q.has_key('popcon'):
434                     db[p]['main']['debian_popcon'] = q['popcon']
435                 # if we have debian, need to get ubuntu
436                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
437                 if q and q.has_key('popcon'):
438                     db[p]['main']['ubuntu_popcon'] = q['popcon']
439             else:
440                 print("Ignoring unkown origin '%s' for package '%s'." \
441                         % (origin, p))
442
443         # now get info for package from all releases in UDD
444         q = dde_get(query_url + "/dist/p:%s" % p)
445         if not q:
446             continue
447         # hold all info about this package per distribution release
448         info = {}
449         for cp in q:
450             distkey = (trans_codename(cp['release'], cfg),
451                        "%s-%s" % (cp['distribution'], cp['release']))
452             if not info.has_key(distkey):
453                 info[distkey] = cp
454                 # turn into a list to append others later
455                 info[distkey]['architecture'] = [info[distkey]['architecture']]
456             # accumulate data for multiple over archs
457             else:
458                 comp = apt.VersionCompare(cp['version'],
459                                           info[distkey]['version'])
460                 # found another arch for the same version
461                 if comp == 0:
462                     info[distkey]['architecture'].append(cp['architecture'])
463                 # found newer version, dump the old ones
464                 elif comp > 0:
465                     info[distkey] = cp
466                     # turn into a list to append others later
467                     info[distkey]['architecture'] = [info[distkey]['architecture']]
468                 # simply ignore older versions
469                 else:
470                     pass
471
472         # finally assign the new package data
473         for k, v in info.iteritems():
474             db[p][k] = v
475
476     return db
477
478
479 def convert_longdescr(ld):
480     ld = ld.replace('% ', '%% ')
481     ld = ld.split('\n')
482     for i, l in enumerate(ld):
483         if l == ' .':
484             ld[i] = ' #NEWLINEMARKER#'
485         # look for embedded lists
486         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
487             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
488
489     ld = u' '.join([l[1:] for l in ld])
490     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
491     # cleanup any leftover (e.g. trailing markers)
492     ld = ld.replace('#NEWLINEMARKER#', '')
493     return ld
494
495
496 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
497     # local binding for ease of use
498     pkgdb = db[pkg]
499     # do nothing if there is not at least the very basic stuff
500     if not pkgdb['main'].has_key('description'):
501         return
502     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
503     underline = '*' * (len(title) + 2)
504     title = '%s\n %s\n%s' % (underline, title, underline)
505
506     page = template.render(
507             pkg=pkg,
508             title=title,
509             long_description=convert_longdescr(pkgdb['main']['long_description']),
510             cfg=cfg,
511             db=pkgdb,
512             fulldb=db)
513     # the following can be replaced by something like
514     # {% include "sidebar.html" ignore missing %}
515     # in the template whenever jinja 2.2 becomes available
516     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
517     if os.path.exists(addenum):
518         page += '\n\n.. include:: %s\n' % addenum
519     return page
520
521
522 def store_db(db, filename):
523     pp = PrettyPrinter(indent=2)
524     f = codecs.open(filename, 'w', 'utf-8')
525     f.write(pp.pformat(db))
526     f.close()
527
528
529 def read_db(filename):
530     f = codecs.open(filename, 'r', 'utf-8')
531     db = eval(f.read())
532     return db
533
534 def write_sourceslist(jinja_env, cfg, outdir):
535     create_dir(outdir)
536     create_dir(os.path.join(outdir, '_static'))
537
538     repos = {}
539     for release in cfg.options('release codenames'):
540         transrel = trans_codename(release, cfg)
541         repos[transrel] = []
542         for mirror in cfg.options('mirrors'):
543             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
544             repos[transrel].append((mirror, listname))
545             lf = open(os.path.join(outdir, '_static', listname), 'w')
546             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
547                                                       release)
548             lf.write('deb %s' % aptcfg)
549             lf.write('deb-src %s' % aptcfg)
550             lf.close()
551
552     srclist_template = jinja_env.get_template('sources_lists.rst')
553     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
554     sl.write(srclist_template.render(repos=repos))
555     sl.close()
556
557
558 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
559     create_dir(outdir)
560     create_dir(os.path.join(outdir, 'pkgs'))
561
562     # generate the TOC with all packages
563     toc_template = jinja_env.get_template('pkgs_toc.rst')
564     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
565     toc.write(toc_template.render(pkgs=db.keys()))
566     toc.close()
567
568     # and now each individual package page
569     pkg_template = jinja_env.get_template('pkg.rst')
570     for p in db.keys():
571         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
572         # when no page is available skip this package
573         if page is None:
574             continue
575         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
576         pf.write(page)
577         pf.close()
578
579
580 def prepOptParser(op):
581     # use module docstring for help output
582     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
583
584     op.add_option("--db",
585                   action="store", type="string", dest="db",
586                   default=None,
587                   help="Database file to read. Default: None")
588
589     op.add_option("--cfg",
590                   action="store", type="string", dest="cfg",
591                   default=None,
592                   help="Repository config file.")
593
594     op.add_option("-o", "--outdir",
595                   action="store", type="string", dest="outdir",
596                   default=None,
597                   help="Target directory for ReST output. Default: None")
598
599     op.add_option("-r", "--release-url",
600                   action="append", dest="release_urls",
601                   help="None")
602
603     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
604                   type="string", default=None, help="None")
605
606
607 def main():
608     op = OptionParser(version="%prog 0.0.2")
609     prepOptParser(op)
610
611     (opts, args) = op.parse_args()
612
613     if len(args) != 1:
614         print('There needs to be exactly one command')
615         sys.exit(1)
616
617     cmd = args[0]
618
619     if opts.cfg is None:
620         print("'--cfg' option is mandatory.")
621         sys.exit(1)
622     if opts.db is None:
623         print("'--db' option is mandatory.")
624         sys.exit(1)
625
626
627     cfg = SafeConfigParser()
628     cfg.read(opts.cfg)
629
630     # load existing db, unless renew is requested
631     if cmd == 'updatedb':
632         db = {}
633         if cfg.has_option('packages', 'select taskfiles'):
634             db = add_pkgfromtaskfile(db, cfg.get('packages',
635                                                  'select taskfiles').split())
636
637         # add additional package names from config file
638         if cfg.has_option('packages', 'select names'):
639             for p in cfg.get('packages', 'select names').split():
640                 if not db.has_key(p):
641                     db[p] = get_emptydbentry()
642
643         # get info from task files
644         if cfg.has_option('packages', 'prospective'):
645             for url in cfg.get('packages', 'prospective').split():
646                 db = import_blendstask(db, url)
647
648         # parse NeuroDebian repository
649         if cfg.has_option('neurodebian', 'releases'):
650             for rurl in cfg.get('neurodebian', 'releases').split():
651                 db = import_release(cfg, db, rurl)
652
653         # collect package information from DDE
654         db = import_dde(cfg, db)
655         # store the new DB
656         store_db(db, opts.db)
657         # and be done
658         return
659
660     # load the db from file
661     db = read_db(opts.db)
662
663     # fire up jinja
664     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
665
666     # generate package pages and TOC and write them to files
667     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
668
669     write_sourceslist(jinja_env, cfg, opts.outdir)
670
671 if __name__ == "__main__":
672     main()