]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
Merge branch 'master' of ssh://alioth.debian.org/git/pkg-exppsy/neurodebian
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8
9 # Lets first assure no guarding (but annoying) warnings
10 import warnings
11 warnings.simplefilter('ignore', FutureWarning)
12 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
13
14 import apt
15 from ConfigParser import SafeConfigParser
16 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
17 import sys
18 import os
19 import shutil
20 import urllib2
21 import urllib
22 import codecs
23 import subprocess
24 import time
25 # templating
26 from jinja2 import Environment, PackageLoader
27
28 from pprint import PrettyPrinter
29
30
31 class AptListsCache(object):
32     def __init__(self, cachedir='build/cache',
33                  ro_cachedirs=None,
34                  init_db=None):
35         self.cachedir = cachedir
36
37         if not ro_cachedirs is None:
38             self.ro_cachedirs = ro_cachedirs
39         else:
40             self.ro_cachedirs = []
41
42         # create cachedir
43         create_dir(self.cachedir)
44
45     def get(self, url, update=False):
46         """Looks in the cache if the file is there and takes the cached one.
47         Otherwise it is downloaded first.
48
49         Knows how to deal with http:// and svn:// URLs.
50
51         :Return:
52           file handler
53         """
54         # look whether it is compressed
55         cext = url.split('.')[-1]
56         if cext in ['gz', 'bz2']:
57             target_url = url[:-1 * len(cext) -1]
58         else:
59             # assume not compressed
60             target_url = url
61             cext = None
62
63         # turn url into a filename -- mimik what APT does for
64         # /var/lib/apt/lists/
65         tfilename = '_'.join(target_url.split('/')[2:])
66
67         # if we need to download anyway do not search
68         if update:
69             cfilename = os.path.join(self.cachedir, tfilename)
70         else:
71             # look for the uncompressed file anywhere in the cache
72             cfilename = None
73             for cp in [self.cachedir] + self.ro_cachedirs:
74                 if os.path.exists(os.path.join(cp, tfilename)):
75                     cfilename = os.path.join(cp, tfilename)
76
77         # nothing found?
78         if cfilename is None:
79             # add cache item
80             cfilename = os.path.join(self.cachedir, tfilename)
81             update = True
82
83         # if updated needed -- download
84         if update:
85             #print 'Caching file from %s' % url
86
87             if url.startswith('svn://'):
88                 # export from SVN
89                 pysvn.Client().export(url, cfilename)
90             if url.startswith('http://'):
91                 # download
92                 tempfile, ignored = urllib.urlretrieve(url)
93
94                 # decompress
95                 decompressor = None
96                 if cext == 'gz':
97                     decompressor = 'gzip'
98                 elif cext == 'bz2':
99                     decompressor = 'bzip2'
100                 elif cext == None:
101                     decompressor = None
102                 else:
103                     raise ValueError, \
104                           "Don't know how to decompress %s files" \
105                           % cext
106
107                 if not decompressor is None:
108                     if subprocess.call([decompressor, '-d', '-q', '-f',
109                                        tempfile]) == 1:
110                         raise RuntimeError, \
111                               "Something went wrong while decompressing '%s'" \
112                               % tempfile
113
114                 # move decompressed file into cache
115                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
116
117                 # XXX do we need that if explicit filename is provided?
118                 urllib.urlcleanup()
119
120         # open cached file
121         fh = codecs.open(cfilename, 'r', 'utf-8')
122
123         return fh
124
125
126 def add_pkgfromtaskfile(db, urls):
127     cache = AptListsCache()
128     pkgs = []
129
130     for task in urls:
131         fh = cache.get(task)
132
133         # loop over all stanzas
134         for stanza in deb822.Packages.iter_paragraphs(fh):
135             if stanza.has_key('Depends'):
136                 pkg = stanza['Depends']
137             elif stanza.has_key('Suggests'):
138                 pkg = stanza['Suggests']
139             else:
140                 continue
141
142             # account for multiple packages per line
143             if pkg.count(','):
144                 pkgs += [p.strip() for p in pkg.split(',')]
145             else:
146                 pkgs.append(pkg.strip())
147
148     for p in pkgs:
149         if not db.has_key(p):
150             db[p] = get_emptydbentry()
151
152     return db
153
154 def get_emptydbentry():
155     return {'main': {}}
156
157 def import_blendstask(db, url):
158     cache = AptListsCache()
159     fh = cache.get(url)
160     task_name = None
161
162     # figure out blend's task page URL, since they differ from blend to blend
163     urlsec = url.split('/')
164     blendname = urlsec[-3]
165     if blendname == 'debian-med':
166         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
167     elif blendname == 'debian-science':
168         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
169     else:
170         raise ValueError('Unknown blend "%s"' % blendname)
171     taskpage_url += urlsec[-1]
172
173     for st in deb822.Packages.iter_paragraphs(fh):
174         if st.has_key('Task'):
175             task_name = st['Task']
176             task = (blendname, task_name, taskpage_url)
177
178         if st.has_key('Depends'):
179             pkg = st['Depends']
180         elif st.has_key('Suggests'):
181             pkg = st['Suggests']
182         else:
183 #            print 'Warning: Cannot determine name of prospective package ' \
184 #                    '... ignoring. Dump follows:'
185 #            print st
186             continue
187
188         # take care of pkg lists
189         for p in pkg.split(', '):
190             if not db.has_key(p):
191                 print 'Ignoring blend package "%s"' % p
192                 continue
193
194             info = {}
195
196             # blends info
197             info['tasks'] = [task]
198             if st.has_key('License'):
199                 info['license'] = st['License']
200             if st.has_key('Responsible'):
201                 info['responsible'] = st['Responsible']
202
203             # pkg description
204             if st.has_key('Pkg-Description'):
205                 descr = st['Pkg-Description'].split('\n')
206                 info['description'] = descr[0].strip()
207                 info['long_description'] = \
208                         u'\n'.join(descr[1:])
209
210                 # charge the basic property set
211                 db[p]['main']['description'] = info['description']
212                 db[p]['main']['long_description'] = info['long_description']
213             if st.has_key('WNPP'):
214                 db[p]['main']['debian_itp'] = st['WNPP']
215             if st.has_key('Pkg-URL'):
216                 db[p]['main']['other_pkg'] = st['Pkg-URL']
217             if st.has_key('Homepage'):
218                 db[p]['main']['homepage'] = st['Homepage']
219
220             # Publications
221             if st.has_key('Published-Title'):
222                 pub = {'title': st['Published-Title']}
223                 if st.has_key('Published-Authors'):
224                     pub['authors'] = st['Published-Authors']
225                 if st.has_key('Published-Year'):
226                     pub['year'] = st['Published-Year']
227                 if st.has_key('Published-In'):
228                     pub['in'] = st['Published-In']
229                 if st.has_key('Published-URL'):
230                     pub['url'] = st['Published-URL']
231                 if st.has_key('Published-DOI'):
232                     pub['doi'] = st['Published-DOI']
233                     # need at least one URL
234                     if not pub.has_key('url'):
235                         pub['url'] = st['Published-DOI']
236
237                 db[p]['main']['publication'] = pub
238
239             # Registration
240             if st.has_key('Registration'):
241                 db[p]['main']['registration'] = st['Registration']
242
243             # Remarks
244             if st.has_key('Remark'):
245                 # prepend a single space to make it look like a long description
246                 info['remark'] = convert_longdescr(' ' + st['Remark'])
247
248             # only store if there isn't something already
249             if not db[p].has_key('blends'):
250                 db[p]['blends'] = info
251             else:
252                 # just add this tasks name and id
253                 db[p]['blends']['tasks'].append(task)
254
255     return db
256
257
258 def get_releaseinfo(rurl):
259     cache = AptListsCache()
260     # root URL of the repository
261     baseurl = '/'.join(rurl.split('/')[:-1])
262     # get the release file from the cache
263     release_file = cache.get(rurl)
264
265     # create parser instance
266     rp = deb822.Release(release_file)
267
268     # architectures on this dist
269     archs = rp['Architectures'].split()
270     components = rp['Components'].split()
271     # compile a new codename that also considers the repository label
272     # to distinguish between official and unofficial repos.
273     label = rp['Label']
274     origin = rp['Origin']
275     codename = rp['Codename']
276     labelcode = '_'.join([rp['Label'], rp['Codename']])
277
278     # cleanup
279     release_file.close()
280
281     return {'baseurl': baseurl, 'archs': archs, 'components': components,
282             'codename': codename, 'label': label, 'labelcode': labelcode,
283             'origin': origin}
284
285
286 def build_pkgsurl(baseurl, component, arch):
287     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
288
289
290 def import_release(cfg, db, rurl):
291     cache = AptListsCache()
292
293     ri = get_releaseinfo(rurl)
294
295     # compile the list of Packages files to parse and parse them
296     for c in ri['components']:
297         for a in ri['archs']:
298             # compile packages URL
299             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
300
301             # retrieve from cache
302             packages_file = cache.get(pkgsurl)
303
304             # parse
305             for stanza in deb822.Packages.iter_paragraphs(packages_file):
306                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
307
308             # cleanup
309             packages_file.close()
310
311     return db
312
313 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
314     """
315     :Parameter:
316       st: Package section
317     """
318     pkg = st['Package']
319
320     # only care for known packages
321     if not db.has_key(pkg):
322 #        print 'Ignoring NeuroDebian package "%s"' % pkg
323         return db
324
325     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
326
327     if db[pkg].has_key(distkey):
328         info = db[pkg][distkey]
329     else:
330         info = {'architecture': []}
331
332     # fill in data
333     if not st['Architecture'] in info['architecture']:
334         info['architecture'].append(st['Architecture'])
335     info['maintainer'] = st['Maintainer']
336     if st.has_key('Homepage'):
337         info['homepage'] = st['Homepage']
338     info['version'] = st['Version']
339
340     # origin
341     info['distribution'] = origin
342     info['release'] = codename
343     info['component'] = component
344
345     # pool url
346     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
347
348     # pkg description
349     descr = st['Description'].replace('%', '%%').split('\n')
350     info['description'] = descr[0].strip()
351     info['long_description'] = u'\n'.join(descr[1:])
352
353     db[pkg][distkey] = info
354
355     # charge the basic property set
356     db[pkg]['main']['description'] = info['description']
357     db[pkg]['main']['long_description'] = info['long_description']
358     if st.has_key('Source'):
359         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
360     else:
361         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
362     if st.has_key('Homepage'):
363         db[pkg]['main']['homepage'] = st['Homepage']
364     if st.has_key('Recommends'):
365         db[pkg]['main']['recommends'] = st['Recommends']
366
367     return db
368
369
370 def trans_codename(codename, cfg):
371     """Translate a known codename into a release description.
372
373     Unknown codenames will simply be returned as is.
374     """
375     # if we know something, tell
376     if codename in cfg.options('release codenames'):
377         return cfg.get('release codenames', codename)
378     else:
379         return codename
380
381
382 def create_dir(path):
383     if os.path.exists(path):
384         return
385
386     ps = path.split(os.path.sep)
387
388     for i in range(1,len(ps) + 1):
389         p = os.path.sep.join(ps[:i])
390
391         if not os.path.exists(p):
392             os.mkdir(p)
393
394
395 def dde_get(url, fail=False):
396     # enforce delay to be friendly to DDE
397     time.sleep(3)
398     try:
399         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
400         print "SUCCESS:", url
401         return data
402     except urllib2.HTTPError, e:
403         print "NOINFO:", url, type(e)
404         return False
405     except urllib2.URLError, e:
406         print "URLERROR:", url, type(e)
407         if fail:
408             print "Permanant failure"
409             return False
410         print "Try again after 30 seconds..."
411         time.sleep(30)
412         return dde_get(url, fail=True)
413     except (StopIteration):
414         print "NOINFO:", url
415         return False
416     except json.ReadException, e:
417         print "UDD-DOWN?:", url, type(e)
418         return False
419
420
421 def nitrc_get(spec, fail=False):
422     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
423     try:
424         data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
425         print "NITRC-SUCCESS:", spec
426     except urllib2.HTTPError, e:
427         print "NITRC-NOINFO:", spec, type(e)
428         return False
429     except urllib2.URLError, e:
430         print "NITRC-URLERROR:", spec, type(e)
431         if fail:
432             print "Permanant failure"
433             return False
434         print "Try again after 30 seconds..."
435         time.sleep(30)
436         return nitrc_get(spec, fail=True)
437     return data
438
439
440 def parse_nitrc(data):
441     if data is False:
442         return None
443     # simplify -- there is only one project in the data
444     project = data['projects'][0]
445     nitrc_filtered = {'downloads': 0,
446                       'id': project['id']}
447     for pkg in project['packages']:
448         for release in pkg['releases']:
449             for file in release['files']:
450                 nitrc_filtered['downloads'] += file['download_count']
451     return nitrc_filtered
452
453
454 def import_nitrc(cfg, db):
455     for p in db.keys():
456         if not cfg.has_option("nitrc ids", p):
457             continue
458         nitrc_spec = cfg.get("nitrc ids", p)
459         nitrc_data = nitrc_get(nitrc_spec)
460         nitrc_excerpt = parse_nitrc(nitrc_data)
461         if not nitrc_excerpt is None:
462             db[p]['nitrc'] = nitrc_excerpt
463     return db
464
465
466 def import_dde(cfg, db):
467     query_url = cfg.get('dde', 'pkgquery_url')
468     for p in db.keys():
469         # get freshest
470         q = dde_get(query_url + "/packages/all/%s" % p)
471         if q:
472             # copy all stuff, while preserving non-overlapping information
473             for k, v in q.iteritems():
474                 db[p]['main'][k] = v
475             # get latest popcon info for debian and ubuntu
476             # cannot use origin field itself, since it is none for few packages
477             # i.e. python-nifti
478             origin = q['drc'].split()[0]
479             if origin == 'ubuntu':
480                 if q.has_key('popcon'):
481                     db[p]['main']['ubuntu_popcon'] = q['popcon']
482                 # if we have ubuntu, need to get debian
483                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
484                 if q and q.has_key('popcon'):
485                     db[p]['main']['debian_popcon'] = q['popcon']
486             elif origin == 'debian':
487                 if q.has_key('popcon'):
488                     db[p]['main']['debian_popcon'] = q['popcon']
489                 # if we have debian, need to get ubuntu
490                 q = dde_get(query_url + "/packages/prio-ubuntu-karmic/%s" % p)
491                 if q and q.has_key('popcon'):
492                     db[p]['main']['ubuntu_popcon'] = q['popcon']
493             else:
494                 print("Ignoring unkown origin '%s' for package '%s'." \
495                         % (origin, p))
496
497         # now get info for package from all releases in UDD
498         q = dde_get(query_url + "/dist/p:%s" % p)
499         if not q:
500             continue
501         # hold all info about this package per distribution release
502         info = {}
503         for cp in q:
504             distkey = (trans_codename(cp['release'], cfg),
505                        "%s-%s" % (cp['distribution'], cp['release']))
506             if not info.has_key(distkey):
507                 info[distkey] = cp
508                 # turn into a list to append others later
509                 info[distkey]['architecture'] = [info[distkey]['architecture']]
510             # accumulate data for multiple over archs
511             else:
512                 comp = apt.VersionCompare(cp['version'],
513                                           info[distkey]['version'])
514                 # found another arch for the same version
515                 if comp == 0:
516                     info[distkey]['architecture'].append(cp['architecture'])
517                 # found newer version, dump the old ones
518                 elif comp > 0:
519                     info[distkey] = cp
520                     # turn into a list to append others later
521                     info[distkey]['architecture'] = [info[distkey]['architecture']]
522                 # simply ignore older versions
523                 else:
524                     pass
525
526         # finally assign the new package data
527         for k, v in info.iteritems():
528             db[p][k] = v
529
530     return db
531
532
533 def convert_longdescr(ld):
534     ld = ld.replace('% ', '%% ')
535     ld = ld.split('\n')
536     for i, l in enumerate(ld):
537         if l == ' .':
538             ld[i] = ' #NEWLINEMARKER#'
539         # look for embedded lists
540         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
541             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
542
543     ld = u' '.join([l[1:] for l in ld])
544     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
545     # cleanup any leftover (e.g. trailing markers)
546     ld = ld.replace('#NEWLINEMARKER#', '')
547     return ld
548
549
550 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
551     # local binding for ease of use
552     pkgdb = db[pkg]
553     # do nothing if there is not at least the very basic stuff
554     if not pkgdb['main'].has_key('description'):
555         return
556     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
557     underline = '*' * (len(title) + 2)
558     title = '%s\n %s\n%s' % (underline, title, underline)
559
560     page = template.render(
561             pkg=pkg,
562             title=title,
563             long_description=convert_longdescr(pkgdb['main']['long_description']),
564             cfg=cfg,
565             db=pkgdb,
566             fulldb=db)
567     # the following can be replaced by something like
568     # {% include "sidebar.html" ignore missing %}
569     # in the template whenever jinja 2.2 becomes available
570     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
571     if os.path.exists(addenum):
572         page += '\n\n.. include:: %s\n' % addenum
573     return page
574
575
576 def store_db(db, filename):
577     pp = PrettyPrinter(indent=2)
578     f = codecs.open(filename, 'w', 'utf-8')
579     f.write(pp.pformat(db))
580     f.close()
581
582
583 def read_db(filename):
584     f = codecs.open(filename, 'r', 'utf-8')
585     db = eval(f.read())
586     return db
587
588 def write_sourceslist(jinja_env, cfg, outdir):
589     create_dir(outdir)
590     create_dir(os.path.join(outdir, '_static'))
591
592     repos = {}
593     for release in cfg.options('release codenames'):
594         transrel = trans_codename(release, cfg)
595         repos[transrel] = []
596         for mirror in cfg.options('mirrors'):
597             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
598             repos[transrel].append((mirror, listname))
599             lf = open(os.path.join(outdir, '_static', listname), 'w')
600             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
601                                                       release)
602             lf.write('deb %s' % aptcfg)
603             lf.write('deb-src %s' % aptcfg)
604             lf.close()
605
606     srclist_template = jinja_env.get_template('sources_lists.rst')
607     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
608     sl.write(srclist_template.render(repos=repos))
609     sl.close()
610
611
612 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
613     create_dir(outdir)
614     create_dir(os.path.join(outdir, 'pkgs'))
615
616     # generate the TOC with all packages
617     toc_template = jinja_env.get_template('pkgs_toc.rst')
618     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
619     toc.write(toc_template.render(pkgs=db.keys()))
620     toc.close()
621
622     # and now each individual package page
623     pkg_template = jinja_env.get_template('pkg.rst')
624     for p in db.keys():
625         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
626         # when no page is available skip this package
627         if page is None:
628             continue
629         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
630         pf.write(page)
631         pf.close()
632
633
634 def prepOptParser(op):
635     # use module docstring for help output
636     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
637
638     op.add_option("--db",
639                   action="store", type="string", dest="db",
640                   default=None,
641                   help="Database file to read. Default: None")
642
643     op.add_option("--cfg",
644                   action="store", type="string", dest="cfg",
645                   default=None,
646                   help="Repository config file.")
647
648     op.add_option("-o", "--outdir",
649                   action="store", type="string", dest="outdir",
650                   default=None,
651                   help="Target directory for ReST output. Default: None")
652
653     op.add_option("-r", "--release-url",
654                   action="append", dest="release_urls",
655                   help="None")
656
657     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
658                   type="string", default=None, help="None")
659
660
661 def main():
662     op = OptionParser(version="%prog 0.0.2")
663     prepOptParser(op)
664
665     (opts, args) = op.parse_args()
666
667     if len(args) != 1:
668         print('There needs to be exactly one command')
669         sys.exit(1)
670
671     cmd = args[0]
672
673     if opts.cfg is None:
674         print("'--cfg' option is mandatory.")
675         sys.exit(1)
676     if opts.db is None:
677         print("'--db' option is mandatory.")
678         sys.exit(1)
679
680
681     cfg = SafeConfigParser()
682     cfg.read(opts.cfg)
683
684     # load existing db, unless renew is requested
685     if cmd == 'updatedb':
686         db = {}
687         if cfg.has_option('packages', 'select taskfiles'):
688             db = add_pkgfromtaskfile(db, cfg.get('packages',
689                                                  'select taskfiles').split())
690
691         # add additional package names from config file
692         if cfg.has_option('packages', 'select names'):
693             for p in cfg.get('packages', 'select names').split():
694                 if not db.has_key(p):
695                     db[p] = get_emptydbentry()
696
697         # get info from task files
698         if cfg.has_option('packages', 'prospective'):
699             for url in cfg.get('packages', 'prospective').split():
700                 db = import_blendstask(db, url)
701
702         # parse NeuroDebian repository
703         if cfg.has_option('neurodebian', 'releases'):
704             for rurl in cfg.get('neurodebian', 'releases').split():
705                 db = import_release(cfg, db, rurl)
706
707         # collect package information from DDE
708         db = import_dde(cfg, db)
709         # get info from NITRC
710         db = import_nitrc(cfg, db)
711         # store the new DB
712         store_db(db, opts.db)
713         # and be done
714         return
715
716     # load the db from file
717     db = read_db(opts.db)
718
719     # fire up jinja
720     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
721
722     # generate package pages and TOC and write them to files
723     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
724
725     write_sourceslist(jinja_env, cfg, opts.outdir)
726
727 if __name__ == "__main__":
728     main()