]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
Add a permanent 'data' distribution.
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8
9 # Lets first assure no guarding (but annoying) warnings
10 import warnings
11 warnings.simplefilter('ignore', FutureWarning)
12 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
13
14 import apt
15 from ConfigParser import SafeConfigParser
16 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
17 import sys
18 import os
19 import copy
20 import shutil
21 import urllib2
22 import urllib
23 import codecs
24 import subprocess
25 import time
26 # templating
27 from jinja2 import Environment, PackageLoader
28
29 from pprint import PrettyPrinter
30
31
32 class AptListsCache(object):
33     def __init__(self, cachedir='build/cache',
34                  ro_cachedirs=None,
35                  init_db=None):
36         self.cachedir = cachedir
37
38         if not ro_cachedirs is None:
39             self.ro_cachedirs = ro_cachedirs
40         else:
41             self.ro_cachedirs = []
42
43         # create cachedir
44         create_dir(self.cachedir)
45
46     def get(self, url, update=False):
47         """Looks in the cache if the file is there and takes the cached one.
48         Otherwise it is downloaded first.
49
50         Knows how to deal with http:// and svn:// URLs.
51
52         :Return:
53           file handler
54         """
55         # look whether it is compressed
56         cext = url.split('.')[-1]
57         if cext in ['gz', 'bz2']:
58             target_url = url[:-1 * len(cext) -1]
59         else:
60             # assume not compressed
61             target_url = url
62             cext = None
63
64         # turn url into a filename -- mimik what APT does for
65         # /var/lib/apt/lists/
66         tfilename = '_'.join(target_url.split('/')[2:])
67
68         # if we need to download anyway do not search
69         if update:
70             cfilename = os.path.join(self.cachedir, tfilename)
71         else:
72             # look for the uncompressed file anywhere in the cache
73             cfilename = None
74             for cp in [self.cachedir] + self.ro_cachedirs:
75                 if os.path.exists(os.path.join(cp, tfilename)):
76                     cfilename = os.path.join(cp, tfilename)
77
78         # nothing found?
79         if cfilename is None:
80             # add cache item
81             cfilename = os.path.join(self.cachedir, tfilename)
82             update = True
83
84         # if updated needed -- download
85         if update:
86             #print 'Caching file from %s' % url
87
88             if url.startswith('svn://'):
89                 # export from SVN
90                 pysvn.Client().export(url, cfilename)
91             if url.startswith('http://'):
92                 # download
93                 tempfile, ignored = urllib.urlretrieve(url)
94
95                 # decompress
96                 decompressor = None
97                 if cext == 'gz':
98                     decompressor = 'gzip'
99                 elif cext == 'bz2':
100                     decompressor = 'bzip2'
101                 elif cext == None:
102                     decompressor = None
103                 else:
104                     raise ValueError, \
105                           "Don't know how to decompress %s files" \
106                           % cext
107
108                 if not decompressor is None:
109                     if subprocess.call([decompressor, '-d', '-q', '-f',
110                                        tempfile]) == 1:
111                         raise RuntimeError, \
112                               "Something went wrong while decompressing '%s'" \
113                               % tempfile
114
115                 # move decompressed file into cache
116                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
117
118                 # XXX do we need that if explicit filename is provided?
119                 urllib.urlcleanup()
120
121         # open cached file
122         fh = codecs.open(cfilename, 'r', 'utf-8')
123
124         return fh
125
126
127 def add_pkgfromtaskfile(db, urls):
128     cache = AptListsCache()
129     pkgs = []
130
131     for task in urls:
132         fh = cache.get(task)
133
134         # loop over all stanzas
135         for stanza in deb822.Packages.iter_paragraphs(fh):
136             if stanza.has_key('Depends'):
137                 pkg = stanza['Depends']
138             elif stanza.has_key('Suggests'):
139                 pkg = stanza['Suggests']
140             else:
141                 continue
142
143             # account for multiple packages per line
144             if pkg.count(','):
145                 pkgs += [p.strip() for p in pkg.split(',')]
146             else:
147                 pkgs.append(pkg.strip())
148
149     for p in pkgs:
150         if not db.has_key(p):
151             db[p] = get_emptydbentry()
152
153     return db
154
155 def get_emptydbentry():
156     return {'main': {}}
157
158 def import_blendstask(cfg, db, url):
159     cache = AptListsCache()
160     fh = cache.get(url)
161     task_name = None
162
163     # figure out blend's task page URL, since they differ from blend to blend
164     urlsec = url.split('/')
165     blendname = urlsec[-3]
166     if blendname == 'debian-med':
167         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
168     elif blendname == 'debian-science':
169         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
170     else:
171         raise ValueError('Unknown blend "%s"' % blendname)
172     taskpage_url += urlsec[-1]
173
174     for st in deb822.Packages.iter_paragraphs(fh):
175         if st.has_key('Task'):
176             task_name = st['Task']
177             task = (blendname, task_name, taskpage_url)
178
179         if st.has_key('Depends'):
180             pkg = st['Depends']
181         elif st.has_key('Suggests'):
182             pkg = st['Suggests']
183         else:
184 #            print 'Warning: Cannot determine name of prospective package ' \
185 #                    '... ignoring. Dump follows:'
186 #            print st
187             continue
188
189         # take care of pkg lists
190         for p in pkg.split(', '):
191             if not db.has_key(p):
192                 print 'Ignoring blend package "%s"' % p
193                 continue
194
195             info = {}
196
197             # blends info
198             info['tasks'] = [task]
199             if st.has_key('License'):
200                 info['license'] = st['License']
201             if st.has_key('Responsible'):
202                 info['responsible'] = st['Responsible']
203
204             # pkg description
205             if st.has_key('Pkg-Description'):
206                 descr = st['Pkg-Description'].split('\n')
207                 info['description'] = descr[0].strip()
208                 info['long_description'] = \
209                         u'\n'.join(descr[1:])
210
211                 # charge the basic property set
212                 db[p]['main']['description'] = info['description']
213                 db[p]['main']['long_description'] = info['long_description']
214             if st.has_key('WNPP'):
215                 db[p]['main']['debian_itp'] = st['WNPP']
216             if st.has_key('Pkg-URL'):
217                 db[p]['main']['other_pkg'] = st['Pkg-URL']
218             if st.has_key('Homepage'):
219                 db[p]['main']['homepage'] = st['Homepage']
220
221             # Publications
222             if st.has_key('Published-Title'):
223                 pub = {'title': st['Published-Title']}
224                 if st.has_key('Published-Authors'):
225                     pub['authors'] = st['Published-Authors']
226                 if st.has_key('Published-Year'):
227                     pub['year'] = st['Published-Year']
228                 if st.has_key('Published-In'):
229                     pub['in'] = st['Published-In']
230                 if st.has_key('Published-URL'):
231                     pub['url'] = st['Published-URL']
232                 if st.has_key('Published-DOI'):
233                     pub['doi'] = st['Published-DOI']
234                     # need at least one URL
235                     if not pub.has_key('url'):
236                         pub['url'] = st['Published-DOI']
237
238                 db[p]['main']['publication'] = pub
239
240             # Registration
241             if st.has_key('Registration'):
242                 db[p]['main']['registration'] = st['Registration']
243
244             # Remarks
245             if st.has_key('Remark'):
246                 # prepend a single space to make it look like a long description
247                 info['remark'] = convert_longdescr(' ' + st['Remark'])
248
249             # only store if there isn't something already
250             if not db[p].has_key('blends'):
251                 db[p]['blends'] = info
252             else:
253                 # just add this tasks name and id
254                 db[p]['blends']['tasks'].append(task)
255
256             # handle pkg name aliases
257             if p in cfg.options('blend package aliases'):
258                 src_entry = db[p].copy()
259                 # remove original entry
260                 del db[p]
261                 # copy the entry into all aliases
262                 for alias in cfg.get('blend package aliases', p).split():
263                     print "Aliasing %s to %s" % (p, alias)
264                     db[alias] = copy.deepcopy(src_entry)
265
266     return db
267
268
269 def get_releaseinfo(rurl):
270     cache = AptListsCache()
271     # root URL of the repository
272     baseurl = '/'.join(rurl.split('/')[:-1])
273     # get the release file from the cache
274     release_file = cache.get(rurl)
275
276     # create parser instance
277     rp = deb822.Release(release_file)
278
279     # architectures on this dist
280     archs = rp['Architectures'].split()
281     components = rp['Components'].split()
282     # compile a new codename that also considers the repository label
283     # to distinguish between official and unofficial repos.
284     label = rp['Label']
285     origin = rp['Origin']
286     codename = rp['Codename']
287     labelcode = '_'.join([rp['Label'], rp['Codename']])
288
289     # cleanup
290     release_file.close()
291
292     return {'baseurl': baseurl, 'archs': archs, 'components': components,
293             'codename': codename, 'label': label, 'labelcode': labelcode,
294             'origin': origin}
295
296
297 def build_pkgsurl(baseurl, component, arch):
298     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
299
300
301 def import_release(cfg, db, rurl):
302     cache = AptListsCache()
303
304     ri = get_releaseinfo(rurl)
305
306     # compile the list of Packages files to parse and parse them
307     for c in ri['components']:
308         for a in ri['archs']:
309             # compile packages URL
310             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
311
312             # retrieve from cache
313             packages_file = cache.get(pkgsurl)
314
315             # parse
316             for stanza in deb822.Packages.iter_paragraphs(packages_file):
317                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
318
319             # cleanup
320             packages_file.close()
321
322     return db
323
324 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
325     """
326     :Parameter:
327       st: Package section
328     """
329     pkg = st['Package']
330
331     # only care for known packages
332     if not db.has_key(pkg):
333 #        print 'Ignoring NeuroDebian package "%s"' % pkg
334         return db
335
336     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
337
338     if db[pkg].has_key(distkey):
339         info = db[pkg][distkey]
340     else:
341         info = {'architecture': []}
342
343     # fill in data
344     if not st['Architecture'] in info['architecture']:
345         info['architecture'].append(st['Architecture'])
346     info['maintainer'] = st['Maintainer']
347     if st.has_key('Homepage'):
348         info['homepage'] = st['Homepage']
349     info['version'] = st['Version']
350
351     # origin
352     info['distribution'] = origin
353     info['release'] = codename
354     info['component'] = component
355
356     # pool url
357     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
358
359     # pkg description
360     descr = st['Description'].replace('%', '%%').split('\n')
361     info['description'] = descr[0].strip()
362     info['long_description'] = u'\n'.join(descr[1:])
363
364     db[pkg][distkey] = info
365
366     # charge the basic property set
367     db[pkg]['main']['description'] = info['description']
368     db[pkg]['main']['long_description'] = info['long_description']
369     if st.has_key('Source'):
370         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
371     else:
372         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
373     if st.has_key('Homepage'):
374         db[pkg]['main']['homepage'] = st['Homepage']
375     if st.has_key('Recommends'):
376         db[pkg]['main']['recommends'] = st['Recommends']
377
378     return db
379
380
381 def trans_codename(codename, cfg):
382     """Translate a known codename into a release description.
383
384     Unknown codenames will simply be returned as is.
385     """
386     # if we know something, tell
387     if codename in cfg.options('release codenames'):
388         return cfg.get('release codenames', codename)
389     else:
390         return codename
391
392
393 def create_dir(path):
394     if os.path.exists(path):
395         return
396
397     ps = path.split(os.path.sep)
398
399     for i in range(1,len(ps) + 1):
400         p = os.path.sep.join(ps[:i])
401
402         if not os.path.exists(p):
403             os.mkdir(p)
404
405
406 def dde_get(url, fail=False):
407     # enforce delay to be friendly to DDE
408     time.sleep(3)
409     try:
410         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
411         print "SUCCESS:", url
412         return data
413     except urllib2.HTTPError, e:
414         print "NOINFO:", url, type(e)
415         return False
416     except urllib2.URLError, e:
417         print "URLERROR:", url, type(e)
418         if fail:
419             print "Permanant failure"
420             return False
421         print "Try again after 30 seconds..."
422         time.sleep(30)
423         return dde_get(url, fail=True)
424     except (StopIteration):
425         print "NOINFO:", url
426         return False
427     except json.ReadException, e:
428         print "UDD-DOWN?:", url, type(e)
429         return False
430
431
432 def nitrc_get(spec, fail=False):
433     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
434     try:
435         # change into this from python 2.6 on
436         #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
437         data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
438         print "NITRC-SUCCESS:", spec
439     except urllib2.HTTPError, e:
440         print "NITRC-NOINFO:", spec, type(e)
441         return False
442     except urllib2.URLError, e:
443         print "NITRC-URLERROR:", spec, type(e)
444         if fail:
445             print "Permanant failure"
446             return False
447         print "Try again after 30 seconds..."
448         time.sleep(30)
449         return nitrc_get(spec, fail=True)
450     return data
451
452
453 def parse_nitrc(data):
454     if data is False:
455         return None
456     # simplify -- there is only one project in the data
457     project = data['projects'][0]
458     nitrc_filtered = {'downloads': 0,
459                       'id': project['id']}
460     for pkg in project['packages']:
461         for release in pkg['releases']:
462             for file in release['files']:
463                 nitrc_filtered['downloads'] += file['download_count']
464     return nitrc_filtered
465
466
467 def import_nitrc(cfg, db):
468     for p in db.keys():
469         if not cfg.has_option("nitrc ids", p):
470             continue
471         nitrc_spec = cfg.get("nitrc ids", p)
472         nitrc_data = nitrc_get(nitrc_spec)
473         nitrc_excerpt = parse_nitrc(nitrc_data)
474         if not nitrc_excerpt is None:
475             db[p]['nitrc'] = nitrc_excerpt
476     return db
477
478
479 def import_dde(cfg, db):
480     query_url = cfg.get('dde', 'pkgquery_url')
481     for p in db.keys():
482         # get freshest
483         q = dde_get(query_url + "/packages/all/%s" % p)
484         if q:
485             # copy all stuff, while preserving non-overlapping information
486             for k, v in q.iteritems():
487                 db[p]['main'][k] = v
488             # get latest popcon info for debian and ubuntu
489             # cannot use origin field itself, since it is none for few packages
490             # i.e. python-nifti
491             origin = q['drc'].split()[0]
492             if origin == 'ubuntu':
493                 if q.has_key('popcon'):
494                     db[p]['main']['ubuntu_popcon'] = q['popcon']
495                 # if we have ubuntu, need to get debian
496                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
497                 if q and q.has_key('popcon'):
498                     db[p]['main']['debian_popcon'] = q['popcon']
499             elif origin == 'debian':
500                 if q.has_key('popcon'):
501                     db[p]['main']['debian_popcon'] = q['popcon']
502                 # if we have debian, need to get ubuntu
503                 q = dde_get(query_url + "/packages/prio-ubuntu-lucid/%s" % p)
504                 if q and q.has_key('popcon'):
505                     db[p]['main']['ubuntu_popcon'] = q['popcon']
506             else:
507                 print("Ignoring unkown origin '%s' for package '%s'." \
508                         % (origin, p))
509
510         # now get info for package from all releases in UDD
511         q = dde_get(query_url + "/dist/p:%s" % p)
512         if not q:
513             continue
514         # hold all info about this package per distribution release
515         info = {}
516         for cp in q:
517             distkey = (trans_codename(cp['release'], cfg),
518                        "%s-%s" % (cp['distribution'], cp['release']))
519             if not info.has_key(distkey):
520                 info[distkey] = cp
521                 # turn into a list to append others later
522                 info[distkey]['architecture'] = [info[distkey]['architecture']]
523             # accumulate data for multiple over archs
524             else:
525                 comp = apt.VersionCompare(cp['version'],
526                                           info[distkey]['version'])
527                 # found another arch for the same version
528                 if comp == 0:
529                     info[distkey]['architecture'].append(cp['architecture'])
530                 # found newer version, dump the old ones
531                 elif comp > 0:
532                     info[distkey] = cp
533                     # turn into a list to append others later
534                     info[distkey]['architecture'] = [info[distkey]['architecture']]
535                 # simply ignore older versions
536                 else:
537                     pass
538
539         # finally assign the new package data
540         for k, v in info.iteritems():
541             db[p][k] = v
542
543     return db
544
545
546 def convert_longdescr(ld):
547     ld = ld.replace('% ', '%% ')
548     ld = ld.split('\n')
549     for i, l in enumerate(ld):
550         if l == ' .':
551             ld[i] = ' #NEWLINEMARKER#'
552         # look for embedded lists
553         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
554             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
555
556     ld = u' '.join([l[1:] for l in ld])
557     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
558     # cleanup any leftover (e.g. trailing markers)
559     ld = ld.replace('#NEWLINEMARKER#', '')
560     return ld
561
562
563 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
564     # local binding for ease of use
565     pkgdb = db[pkg]
566     # do nothing if there is not at least the very basic stuff
567     if not pkgdb['main'].has_key('description'):
568         return
569     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
570     underline = '*' * (len(title) + 2)
571     title = '%s\n %s\n%s' % (underline, title, underline)
572
573     page = template.render(
574             pkg=pkg,
575             title=title,
576             long_description=convert_longdescr(pkgdb['main']['long_description']),
577             cfg=cfg,
578             db=pkgdb,
579             fulldb=db)
580     # the following can be replaced by something like
581     # {% include "sidebar.html" ignore missing %}
582     # in the template whenever jinja 2.2 becomes available
583     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
584     if os.path.exists(addenum):
585         page += '\n\n.. include:: %s\n' % addenum
586     return page
587
588
589 def store_db(db, filename):
590     pp = PrettyPrinter(indent=2)
591     f = codecs.open(filename, 'w', 'utf-8')
592     f.write(pp.pformat(db))
593     f.close()
594
595
596 def read_db(filename):
597     f = codecs.open(filename, 'r', 'utf-8')
598     db = eval(f.read())
599     return db
600
601 def write_sourceslist(jinja_env, cfg, outdir):
602     create_dir(outdir)
603     create_dir(os.path.join(outdir, '_static'))
604
605     repos = {}
606     for release in cfg.options('release codenames'):
607         if release == 'data':
608             # no seperate list for the data archive
609             continue
610         transrel = trans_codename(release, cfg)
611         repos[transrel] = []
612         for mirror in cfg.options('mirrors'):
613             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
614             repos[transrel].append((mirror, listname))
615             lf = open(os.path.join(outdir, '_static', listname), 'w')
616             for rel in ('data', release):
617                 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
618                                                           rel)
619                 lf.write('deb %s' % aptcfg)
620                 lf.write('#deb-src %s' % aptcfg)
621             lf.close()
622
623     srclist_template = jinja_env.get_template('sources_lists.rst')
624     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
625     sl.write(srclist_template.render(repos=repos))
626     sl.close()
627
628
629 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
630     create_dir(outdir)
631     create_dir(os.path.join(outdir, 'pkgs'))
632
633     # generate the TOC with all packages
634     toc_template = jinja_env.get_template('pkgs_toc.rst')
635     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
636     toc.write(toc_template.render(pkgs=db.keys()))
637     toc.close()
638
639     # and now each individual package page
640     pkg_template = jinja_env.get_template('pkg.rst')
641     for p in db.keys():
642         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
643         # when no page is available skip this package
644         if page is None:
645             continue
646         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
647         pf.write(page)
648         pf.close()
649
650
651 def prepOptParser(op):
652     # use module docstring for help output
653     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
654
655     op.add_option("--db",
656                   action="store", type="string", dest="db",
657                   default=None,
658                   help="Database file to read. Default: None")
659
660     op.add_option("--cfg",
661                   action="store", type="string", dest="cfg",
662                   default=None,
663                   help="Repository config file.")
664
665     op.add_option("-o", "--outdir",
666                   action="store", type="string", dest="outdir",
667                   default=None,
668                   help="Target directory for ReST output. Default: None")
669
670     op.add_option("-r", "--release-url",
671                   action="append", dest="release_urls",
672                   help="None")
673
674     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
675                   type="string", default=None, help="None")
676
677
678 def main():
679     op = OptionParser(version="%prog 0.0.2")
680     prepOptParser(op)
681
682     (opts, args) = op.parse_args()
683
684     if len(args) != 1:
685         print('There needs to be exactly one command')
686         sys.exit(1)
687
688     cmd = args[0]
689
690     if opts.cfg is None:
691         print("'--cfg' option is mandatory.")
692         sys.exit(1)
693     if opts.db is None:
694         print("'--db' option is mandatory.")
695         sys.exit(1)
696
697
698     cfg = SafeConfigParser()
699     cfg.read(opts.cfg)
700
701     # load existing db, unless renew is requested
702     if cmd == 'updatedb':
703         db = {}
704         if cfg.has_option('packages', 'select taskfiles'):
705             db = add_pkgfromtaskfile(db, cfg.get('packages',
706                                                  'select taskfiles').split())
707
708         # add additional package names from config file
709         if cfg.has_option('packages', 'select names'):
710             for p in cfg.get('packages', 'select names').split():
711                 if not db.has_key(p):
712                     db[p] = get_emptydbentry()
713
714         # get info from task files
715         if cfg.has_option('packages', 'prospective'):
716             for url in cfg.get('packages', 'prospective').split():
717                 db = import_blendstask(cfg, db, url)
718
719         # parse NeuroDebian repository
720         if cfg.has_option('neurodebian', 'releases'):
721             for rurl in cfg.get('neurodebian', 'releases').split():
722                 db = import_release(cfg, db, rurl)
723
724         # collect package information from DDE
725         db = import_dde(cfg, db)
726         # get info from NITRC
727         db = import_nitrc(cfg, db)
728         # store the new DB
729         store_db(db, opts.db)
730         # and be done
731         return
732
733     # load the db from file
734     db = read_db(opts.db)
735
736     # fire up jinja
737     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
738
739     # generate package pages and TOC and write them to files
740     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
741
742     write_sourceslist(jinja_env, cfg, opts.outdir)
743
744 if __name__ == "__main__":
745     main()