]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
NF: skip annoying warnings from Future and "already imported"
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8
9 # Lets first assure no guarding (but annoying) warnings
10 import warnings
11 warnings.simplefilter('ignore', FutureWarning)
12 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
13
14 import apt
15 from ConfigParser import SafeConfigParser
16 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
17 import sys
18 import os
19 import shutil
20 import urllib2
21 import urllib
22 import codecs
23 import subprocess
24 import time
25 # templating
26 from jinja2 import Environment, PackageLoader
27
28 from pprint import PrettyPrinter
29
30
31 class AptListsCache(object):
32     def __init__(self, cachedir='build/cache',
33                  ro_cachedirs=None,
34                  init_db=None):
35         self.cachedir = cachedir
36
37         if not ro_cachedirs is None:
38             self.ro_cachedirs = ro_cachedirs
39         else:
40             self.ro_cachedirs = []
41
42         # create cachedir
43         create_dir(self.cachedir)
44
45     def get(self, url, update=False):
46         """Looks in the cache if the file is there and takes the cached one.
47         Otherwise it is downloaded first.
48
49         Knows how to deal with http:// and svn:// URLs.
50
51         :Return:
52           file handler
53         """
54         # look whether it is compressed
55         cext = url.split('.')[-1]
56         if cext in ['gz', 'bz2']:
57             target_url = url[:-1 * len(cext) -1]
58         else:
59             # assume not compressed
60             target_url = url
61             cext = None
62
63         # turn url into a filename -- mimik what APT does for
64         # /var/lib/apt/lists/
65         tfilename = '_'.join(target_url.split('/')[2:])
66
67         # if we need to download anyway do not search
68         if update:
69             cfilename = os.path.join(self.cachedir, tfilename)
70         else:
71             # look for the uncompressed file anywhere in the cache
72             cfilename = None
73             for cp in [self.cachedir] + self.ro_cachedirs:
74                 if os.path.exists(os.path.join(cp, tfilename)):
75                     cfilename = os.path.join(cp, tfilename)
76
77         # nothing found?
78         if cfilename is None:
79             # add cache item
80             cfilename = os.path.join(self.cachedir, tfilename)
81             update = True
82
83         # if updated needed -- download
84         if update:
85             #print 'Caching file from %s' % url
86
87             if url.startswith('svn://'):
88                 # export from SVN
89                 pysvn.Client().export(url, cfilename)
90             if url.startswith('http://'):
91                 # download
92                 tempfile, ignored = urllib.urlretrieve(url)
93
94                 # decompress
95                 decompressor = None
96                 if cext == 'gz':
97                     decompressor = 'gzip'
98                 elif cext == 'bz2':
99                     decompressor = 'bzip2'
100                 elif cext == None:
101                     decompressor = None
102                 else:
103                     raise ValueError, \
104                           "Don't know how to decompress %s files" \
105                           % cext
106
107                 if not decompressor is None:
108                     if subprocess.call([decompressor, '-d', '-q', '-f',
109                                        tempfile]) == 1:
110                         raise RuntimeError, \
111                               "Something went wrong while decompressing '%s'" \
112                               % tempfile
113
114                 # move decompressed file into cache
115                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
116
117                 # XXX do we need that if explicit filename is provided?
118                 urllib.urlcleanup()
119
120         # open cached file
121         fh = codecs.open(cfilename, 'r', 'utf-8')
122
123         return fh
124
125
126 def add_pkgfromtaskfile(db, urls):
127     cache = AptListsCache()
128     pkgs = []
129
130     for task in urls:
131         fh = cache.get(task)
132
133         # loop over all stanzas
134         for stanza in deb822.Packages.iter_paragraphs(fh):
135             if stanza.has_key('Depends'):
136                 pkg = stanza['Depends']
137             elif stanza.has_key('Suggests'):
138                 pkg = stanza['Suggests']
139             else:
140                 continue
141
142             # account for multiple packages per line
143             if pkg.count(','):
144                 pkgs += [p.strip() for p in pkg.split(',')]
145             else:
146                 pkgs.append(pkg.strip())
147
148     for p in pkgs:
149         if not db.has_key(p):
150             db[p] = get_emptydbentry()
151
152     return db
153
154 def get_emptydbentry():
155     return {'main': {}}
156
157 def import_blendstask(db, url):
158     cache = AptListsCache()
159     fh = cache.get(url)
160     task_name = None
161
162     # figure out blend's task page URL, since they differ from blend to blend
163     urlsec = url.split('/')
164     blendname = urlsec[-3]
165     if blendname == 'debian-med':
166         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
167     elif blendname == 'debian-science':
168         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
169     else:
170         raise ValueError('Unknown blend "%s"' % blendname)
171     taskpage_url += urlsec[-1]
172
173     for st in deb822.Packages.iter_paragraphs(fh):
174         if st.has_key('Task'):
175             task_name = st['Task']
176             task = (blendname, task_name, taskpage_url)
177
178         if st.has_key('Depends'):
179             pkg = st['Depends']
180         elif st.has_key('Suggests'):
181             pkg = st['Suggests']
182         else:
183 #            print 'Warning: Cannot determine name of prospective package ' \
184 #                    '... ignoring. Dump follows:'
185 #            print st
186             continue
187
188         # take care of pkg lists
189         for p in pkg.split(', '):
190             if not db.has_key(p):
191                 print 'Ignoring blend package "%s"' % p
192                 continue
193
194             info = {}
195
196             # blends info
197             info['tasks'] = [task]
198             if st.has_key('License'):
199                 info['license'] = st['License']
200             if st.has_key('Responsible'):
201                 info['responsible'] = st['Responsible']
202
203             # pkg description
204             if st.has_key('Pkg-Description'):
205                 descr = st['Pkg-Description'].split('\n')
206                 info['description'] = descr[0].strip()
207                 info['long_description'] = \
208                         u'\n'.join(descr[1:])
209
210                 # charge the basic property set
211                 db[p]['main']['description'] = info['description']
212                 db[p]['main']['long_description'] = info['long_description']
213             if st.has_key('WNPP'):
214                 db[p]['main']['debian_itp'] = st['WNPP']
215             if st.has_key('Pkg-URL'):
216                 db[p]['main']['other_pkg'] = st['Pkg-URL']
217             if st.has_key('Homepage'):
218                 db[p]['main']['homepage'] = st['Homepage']
219
220             # Publications
221             if st.has_key('Published-Title'):
222                 pub = {'title': st['Published-Title']}
223                 if st.has_key('Published-Authors'):
224                     pub['authors'] = st['Published-Authors']
225                 if st.has_key('Published-Year'):
226                     pub['year'] = st['Published-Year']
227                 if st.has_key('Published-In'):
228                     pub['in'] = st['Published-In']
229                 if st.has_key('Published-URL'):
230                     pub['url'] = st['Published-URL']
231                 if st.has_key('Published-DOI'):
232                     pub['doi'] = st['Published-DOI']
233                     # need at least one URL
234                     if not pub.has_key('url'):
235                         pub['url'] = st['Published-DOI']
236
237                 db[p]['main']['publication'] = pub
238
239             # Registration
240             if st.has_key('Registration'):
241                 db[p]['main']['registration'] = st['Registration']
242
243             # Remarks
244             if st.has_key('Remark'):
245                 # prepend a single space to make it look like a long description
246                 info['remark'] = convert_longdescr(' ' + st['Remark'])
247
248             # only store if there isn't something already
249             if not db[p].has_key('blends'):
250                 db[p]['blends'] = info
251             else:
252                 # just add this tasks name and id
253                 db[p]['blends']['tasks'].append(task)
254
255     return db
256
257
258 def get_releaseinfo(rurl):
259     cache = AptListsCache()
260     # root URL of the repository
261     baseurl = '/'.join(rurl.split('/')[:-1])
262     # get the release file from the cache
263     release_file = cache.get(rurl)
264
265     # create parser instance
266     rp = deb822.Release(release_file)
267
268     # architectures on this dist
269     archs = rp['Architectures'].split()
270     components = rp['Components'].split()
271     # compile a new codename that also considers the repository label
272     # to distinguish between official and unofficial repos.
273     label = rp['Label']
274     origin = rp['Origin']
275     codename = rp['Codename']
276     labelcode = '_'.join([rp['Label'], rp['Codename']])
277
278     # cleanup
279     release_file.close()
280
281     return {'baseurl': baseurl, 'archs': archs, 'components': components,
282             'codename': codename, 'label': label, 'labelcode': labelcode,
283             'origin': origin}
284
285
286 def build_pkgsurl(baseurl, component, arch):
287     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
288
289
290 def import_release(cfg, db, rurl):
291     cache = AptListsCache()
292
293     ri = get_releaseinfo(rurl)
294
295     # compile the list of Packages files to parse and parse them
296     for c in ri['components']:
297         for a in ri['archs']:
298             # compile packages URL
299             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
300
301             # retrieve from cache
302             packages_file = cache.get(pkgsurl)
303
304             # parse
305             for stanza in deb822.Packages.iter_paragraphs(packages_file):
306                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
307
308             # cleanup
309             packages_file.close()
310
311     return db
312
313 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
314     """
315     :Parameter:
316       st: Package section
317     """
318     pkg = st['Package']
319
320     # only care for known packages
321     if not db.has_key(pkg):
322 #        print 'Ignoring NeuroDebian package "%s"' % pkg
323         return db
324
325     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
326
327     if db[pkg].has_key(distkey):
328         info = db[pkg][distkey]
329     else:
330         info = {'architecture': []}
331
332     # fill in data
333     if not st['Architecture'] in info['architecture']:
334         info['architecture'].append(st['Architecture'])
335     info['maintainer'] = st['Maintainer']
336     if st.has_key('Homepage'):
337         info['homepage'] = st['Homepage']
338     info['version'] = st['Version']
339
340     # origin
341     info['distribution'] = origin
342     info['release'] = codename
343     info['component'] = component
344
345     # pool url
346     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
347
348     # pkg description
349     descr = st['Description'].replace('%', '%%').split('\n')
350     info['description'] = descr[0].strip()
351     info['long_description'] = u'\n'.join(descr[1:])
352
353     db[pkg][distkey] = info
354
355     # charge the basic property set
356     db[pkg]['main']['description'] = info['description']
357     db[pkg]['main']['long_description'] = info['long_description']
358     if st.has_key('Source'):
359         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
360     else:
361         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
362     if st.has_key('Homepage'):
363         db[pkg]['main']['homepage'] = st['Homepage']
364     if st.has_key('Recommends'):
365         db[pkg]['main']['recommends'] = st['Recommends']
366
367     return db
368
369
370 def trans_codename(codename, cfg):
371     """Translate a known codename into a release description.
372
373     Unknown codenames will simply be returned as is.
374     """
375     # if we know something, tell
376     if codename in cfg.options('release codenames'):
377         return cfg.get('release codenames', codename)
378     else:
379         return codename
380
381
382 def create_dir(path):
383     if os.path.exists(path):
384         return
385
386     ps = path.split(os.path.sep)
387
388     for i in range(1,len(ps) + 1):
389         p = os.path.sep.join(ps[:i])
390
391         if not os.path.exists(p):
392             os.mkdir(p)
393
394
395 def dde_get(url, fail=False):
396     # enforce delay to be friendly to DDE
397     time.sleep(3)
398     try:
399         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
400         print "SUCCESS:", url
401         return data
402     except urllib2.HTTPError, e:
403         print "NOINFO:", url, type(e)
404         return False
405     except urllib2.URLError, e:
406         print "URLERROR:", url, type(e)
407         if fail:
408             print "Permanant failure"
409             return False
410         print "Try again after 30 seconds..."
411         time.sleep(30)
412         return dde_get(url, fail=True)
413     except (StopIteration):
414         print "NOINFO:", url
415         return False
416
417
418 def import_dde(cfg, db):
419     query_url = cfg.get('dde', 'pkgquery_url')
420     for p in db.keys():
421         # get freshest
422         q = dde_get(query_url + "/packages/all/%s" % p)
423         if q:
424             # copy all stuff, while preserving non-overlapping information
425             for k, v in q.iteritems():
426                 db[p]['main'][k] = v
427             # get latest popcon info for debian and ubuntu
428             # cannot use origin field itself, since it is none for few packages
429             # i.e. python-nifti
430             origin = q['drc'].split()[0]
431             if origin == 'ubuntu':
432                 if q.has_key('popcon'):
433                     db[p]['main']['ubuntu_popcon'] = q['popcon']
434                 # if we have ubuntu, need to get debian
435                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
436                 if q and q.has_key('popcon'):
437                     db[p]['main']['debian_popcon'] = q['popcon']
438             elif origin == 'debian':
439                 if q.has_key('popcon'):
440                     db[p]['main']['debian_popcon'] = q['popcon']
441                 # if we have debian, need to get ubuntu
442                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
443                 if q and q.has_key('popcon'):
444                     db[p]['main']['ubuntu_popcon'] = q['popcon']
445             else:
446                 print("Ignoring unkown origin '%s' for package '%s'." \
447                         % (origin, p))
448
449         # now get info for package from all releases in UDD
450         q = dde_get(query_url + "/dist/p:%s" % p)
451         if not q:
452             continue
453         # hold all info about this package per distribution release
454         info = {}
455         for cp in q:
456             distkey = (trans_codename(cp['release'], cfg),
457                        "%s-%s" % (cp['distribution'], cp['release']))
458             if not info.has_key(distkey):
459                 info[distkey] = cp
460                 # turn into a list to append others later
461                 info[distkey]['architecture'] = [info[distkey]['architecture']]
462             # accumulate data for multiple over archs
463             else:
464                 comp = apt.VersionCompare(cp['version'],
465                                           info[distkey]['version'])
466                 # found another arch for the same version
467                 if comp == 0:
468                     info[distkey]['architecture'].append(cp['architecture'])
469                 # found newer version, dump the old ones
470                 elif comp > 0:
471                     info[distkey] = cp
472                     # turn into a list to append others later
473                     info[distkey]['architecture'] = [info[distkey]['architecture']]
474                 # simply ignore older versions
475                 else:
476                     pass
477
478         # finally assign the new package data
479         for k, v in info.iteritems():
480             db[p][k] = v
481
482     return db
483
484
485 def convert_longdescr(ld):
486     ld = ld.replace('% ', '%% ')
487     ld = ld.split('\n')
488     for i, l in enumerate(ld):
489         if l == ' .':
490             ld[i] = ' #NEWLINEMARKER#'
491         # look for embedded lists
492         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
493             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
494
495     ld = u' '.join([l[1:] for l in ld])
496     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
497     # cleanup any leftover (e.g. trailing markers)
498     ld = ld.replace('#NEWLINEMARKER#', '')
499     return ld
500
501
502 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
503     # local binding for ease of use
504     pkgdb = db[pkg]
505     # do nothing if there is not at least the very basic stuff
506     if not pkgdb['main'].has_key('description'):
507         return
508     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
509     underline = '*' * (len(title) + 2)
510     title = '%s\n %s\n%s' % (underline, title, underline)
511
512     page = template.render(
513             pkg=pkg,
514             title=title,
515             long_description=convert_longdescr(pkgdb['main']['long_description']),
516             cfg=cfg,
517             db=pkgdb,
518             fulldb=db)
519     # the following can be replaced by something like
520     # {% include "sidebar.html" ignore missing %}
521     # in the template whenever jinja 2.2 becomes available
522     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
523     if os.path.exists(addenum):
524         page += '\n\n.. include:: %s\n' % addenum
525     return page
526
527
528 def store_db(db, filename):
529     pp = PrettyPrinter(indent=2)
530     f = codecs.open(filename, 'w', 'utf-8')
531     f.write(pp.pformat(db))
532     f.close()
533
534
535 def read_db(filename):
536     f = codecs.open(filename, 'r', 'utf-8')
537     db = eval(f.read())
538     return db
539
540 def write_sourceslist(jinja_env, cfg, outdir):
541     create_dir(outdir)
542     create_dir(os.path.join(outdir, '_static'))
543
544     repos = {}
545     for release in cfg.options('release codenames'):
546         transrel = trans_codename(release, cfg)
547         repos[transrel] = []
548         for mirror in cfg.options('mirrors'):
549             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
550             repos[transrel].append((mirror, listname))
551             lf = open(os.path.join(outdir, '_static', listname), 'w')
552             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
553                                                       release)
554             lf.write('deb %s' % aptcfg)
555             lf.write('deb-src %s' % aptcfg)
556             lf.close()
557
558     srclist_template = jinja_env.get_template('sources_lists.rst')
559     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
560     sl.write(srclist_template.render(repos=repos))
561     sl.close()
562
563
564 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
565     create_dir(outdir)
566     create_dir(os.path.join(outdir, 'pkgs'))
567
568     # generate the TOC with all packages
569     toc_template = jinja_env.get_template('pkgs_toc.rst')
570     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
571     toc.write(toc_template.render(pkgs=db.keys()))
572     toc.close()
573
574     # and now each individual package page
575     pkg_template = jinja_env.get_template('pkg.rst')
576     for p in db.keys():
577         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
578         # when no page is available skip this package
579         if page is None:
580             continue
581         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
582         pf.write(page)
583         pf.close()
584
585
586 def prepOptParser(op):
587     # use module docstring for help output
588     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
589
590     op.add_option("--db",
591                   action="store", type="string", dest="db",
592                   default=None,
593                   help="Database file to read. Default: None")
594
595     op.add_option("--cfg",
596                   action="store", type="string", dest="cfg",
597                   default=None,
598                   help="Repository config file.")
599
600     op.add_option("-o", "--outdir",
601                   action="store", type="string", dest="outdir",
602                   default=None,
603                   help="Target directory for ReST output. Default: None")
604
605     op.add_option("-r", "--release-url",
606                   action="append", dest="release_urls",
607                   help="None")
608
609     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
610                   type="string", default=None, help="None")
611
612
613 def main():
614     op = OptionParser(version="%prog 0.0.2")
615     prepOptParser(op)
616
617     (opts, args) = op.parse_args()
618
619     if len(args) != 1:
620         print('There needs to be exactly one command')
621         sys.exit(1)
622
623     cmd = args[0]
624
625     if opts.cfg is None:
626         print("'--cfg' option is mandatory.")
627         sys.exit(1)
628     if opts.db is None:
629         print("'--db' option is mandatory.")
630         sys.exit(1)
631
632
633     cfg = SafeConfigParser()
634     cfg.read(opts.cfg)
635
636     # load existing db, unless renew is requested
637     if cmd == 'updatedb':
638         db = {}
639         if cfg.has_option('packages', 'select taskfiles'):
640             db = add_pkgfromtaskfile(db, cfg.get('packages',
641                                                  'select taskfiles').split())
642
643         # add additional package names from config file
644         if cfg.has_option('packages', 'select names'):
645             for p in cfg.get('packages', 'select names').split():
646                 if not db.has_key(p):
647                     db[p] = get_emptydbentry()
648
649         # get info from task files
650         if cfg.has_option('packages', 'prospective'):
651             for url in cfg.get('packages', 'prospective').split():
652                 db = import_blendstask(db, url)
653
654         # parse NeuroDebian repository
655         if cfg.has_option('neurodebian', 'releases'):
656             for rurl in cfg.get('neurodebian', 'releases').split():
657                 db = import_release(cfg, db, rurl)
658
659         # collect package information from DDE
660         db = import_dde(cfg, db)
661         # store the new DB
662         store_db(db, opts.db)
663         # and be done
664         return
665
666     # load the db from file
667     db = read_db(opts.db)
668
669     # fire up jinja
670     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
671
672     # generate package pages and TOC and write them to files
673     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
674
675     write_sourceslist(jinja_env, cfg, opts.outdir)
676
677 if __name__ == "__main__":
678     main()