]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
45afb25992e6b9daf3b849bb4a329a3cf91fd14f
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8
9 # Lets first assure no guarding (but annoying) warnings
10 import warnings
11 warnings.simplefilter('ignore', FutureWarning)
12 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
13
14 import apt
15 from ConfigParser import SafeConfigParser
16 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
17 import sys
18 import os
19 import copy
20 import shutil
21 import urllib2
22 import urllib
23 import codecs
24 import subprocess
25 import time
26 # templating
27 from jinja2 import Environment, PackageLoader
28
29 from pprint import PrettyPrinter
30
31
32 class AptListsCache(object):
33     def __init__(self, cachedir='build/cache',
34                  ro_cachedirs=None,
35                  init_db=None):
36         self.cachedir = cachedir
37
38         if not ro_cachedirs is None:
39             self.ro_cachedirs = ro_cachedirs
40         else:
41             self.ro_cachedirs = []
42
43         # create cachedir
44         create_dir(self.cachedir)
45
46     def get(self, url, update=False):
47         """Looks in the cache if the file is there and takes the cached one.
48         Otherwise it is downloaded first.
49
50         Knows how to deal with http:// and svn:// URLs.
51
52         :Return:
53           file handler
54         """
55         # look whether it is compressed
56         cext = url.split('.')[-1]
57         if cext in ['gz', 'bz2']:
58             target_url = url[:-1 * len(cext) -1]
59         else:
60             # assume not compressed
61             target_url = url
62             cext = None
63
64         # turn url into a filename -- mimik what APT does for
65         # /var/lib/apt/lists/
66         tfilename = '_'.join(target_url.split('/')[2:])
67
68         # if we need to download anyway do not search
69         if update:
70             cfilename = os.path.join(self.cachedir, tfilename)
71         else:
72             # look for the uncompressed file anywhere in the cache
73             cfilename = None
74             for cp in [self.cachedir] + self.ro_cachedirs:
75                 if os.path.exists(os.path.join(cp, tfilename)):
76                     cfilename = os.path.join(cp, tfilename)
77
78         # nothing found?
79         if cfilename is None:
80             # add cache item
81             cfilename = os.path.join(self.cachedir, tfilename)
82             update = True
83
84         # if updated needed -- download
85         if update:
86             #print 'Caching file from %s' % url
87
88             if url.startswith('svn://'):
89                 # export from SVN
90                 pysvn.Client().export(url, cfilename)
91             if url.startswith('http://'):
92                 # download
93                 tempfile, ignored = urllib.urlretrieve(url)
94
95                 # decompress
96                 decompressor = None
97                 if cext == 'gz':
98                     decompressor = 'gzip'
99                 elif cext == 'bz2':
100                     decompressor = 'bzip2'
101                 elif cext == None:
102                     decompressor = None
103                 else:
104                     raise ValueError, \
105                           "Don't know how to decompress %s files" \
106                           % cext
107
108                 if not decompressor is None:
109                     if subprocess.call([decompressor, '-d', '-q', '-f',
110                                        tempfile]) == 1:
111                         raise RuntimeError, \
112                               "Something went wrong while decompressing '%s'" \
113                               % tempfile
114
115                 # move decompressed file into cache
116                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
117
118                 # XXX do we need that if explicit filename is provided?
119                 urllib.urlcleanup()
120
121         # open cached file
122         fh = codecs.open(cfilename, 'r', 'utf-8')
123
124         return fh
125
126
127 def add_pkgfromtaskfile(db, urls):
128     cache = AptListsCache()
129     pkgs = []
130
131     for task in urls:
132         fh = cache.get(task)
133
134         # loop over all stanzas
135         for stanza in deb822.Packages.iter_paragraphs(fh):
136             if stanza.has_key('Depends'):
137                 pkg = stanza['Depends']
138             elif stanza.has_key('Suggests'):
139                 pkg = stanza['Suggests']
140             else:
141                 continue
142
143             # account for multiple packages per line
144             if pkg.count(','):
145                 pkgs += [p.strip() for p in pkg.split(',')]
146             else:
147                 pkgs.append(pkg.strip())
148
149     for p in pkgs:
150         if not db.has_key(p):
151             db[p] = get_emptydbentry()
152
153     return db
154
155 def get_emptydbentry():
156     return {'main': {}}
157
158 def import_blendstask(cfg, db, url):
159     cache = AptListsCache()
160     fh = cache.get(url)
161     task_name = None
162
163     # figure out blend's task page URL, since they differ from blend to blend
164     urlsec = url.split('/')
165     blendname = urlsec[-3]
166     if blendname == 'debian-med':
167         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
168     elif blendname == 'debian-science':
169         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
170     else:
171         raise ValueError('Unknown blend "%s"' % blendname)
172     taskpage_url += urlsec[-1]
173
174     for st in deb822.Packages.iter_paragraphs(fh):
175         if st.has_key('Task'):
176             task_name = st['Task']
177             task = (blendname, task_name, taskpage_url)
178
179         if st.has_key('Depends'):
180             pkg = st['Depends']
181         elif st.has_key('Suggests'):
182             pkg = st['Suggests']
183         else:
184 #            print 'Warning: Cannot determine name of prospective package ' \
185 #                    '... ignoring. Dump follows:'
186 #            print st
187             continue
188
189         # take care of pkg lists
190         for p in pkg.split(', '):
191             if not db.has_key(p):
192                 print 'Ignoring blend package "%s"' % p
193                 continue
194
195             info = {}
196
197             # blends info
198             info['tasks'] = [task]
199             if st.has_key('License'):
200                 info['license'] = st['License']
201             if st.has_key('Responsible'):
202                 info['responsible'] = st['Responsible']
203
204             # pkg description
205             if st.has_key('Pkg-Description'):
206                 descr = st['Pkg-Description'].split('\n')
207                 info['description'] = descr[0].strip()
208                 info['long_description'] = \
209                         u'\n'.join(descr[1:])
210
211                 # charge the basic property set
212                 db[p]['main']['description'] = info['description']
213                 db[p]['main']['long_description'] = info['long_description']
214             if st.has_key('WNPP'):
215                 db[p]['main']['debian_itp'] = st['WNPP']
216             if st.has_key('Pkg-URL'):
217                 db[p]['main']['other_pkg'] = st['Pkg-URL']
218             if st.has_key('Homepage'):
219                 db[p]['main']['homepage'] = st['Homepage']
220
221             # Publications
222             if st.has_key('Published-Title'):
223                 pub = {'title': st['Published-Title']}
224                 if st.has_key('Published-Authors'):
225                     pub['authors'] = st['Published-Authors']
226                 if st.has_key('Published-Year'):
227                     pub['year'] = st['Published-Year']
228                 if st.has_key('Published-In'):
229                     pub['in'] = st['Published-In']
230                 if st.has_key('Published-URL'):
231                     pub['url'] = st['Published-URL']
232                 if st.has_key('Published-DOI'):
233                     pub['doi'] = st['Published-DOI']
234                     # need at least one URL
235                     if not pub.has_key('url'):
236                         pub['url'] = st['Published-DOI']
237
238                 db[p]['main']['publication'] = pub
239
240             # Registration
241             if st.has_key('Registration'):
242                 db[p]['main']['registration'] = st['Registration']
243
244             # Remarks
245             if st.has_key('Remark'):
246                 # prepend a single space to make it look like a long description
247                 info['remark'] = convert_longdescr(' ' + st['Remark'])
248
249             # only store if there isn't something already
250             if not db[p].has_key('blends'):
251                 db[p]['blends'] = info
252             else:
253                 # just add this tasks name and id
254                 db[p]['blends']['tasks'].append(task)
255
256             # handle pkg name aliases
257             if p in cfg.options('blend package aliases'):
258                 src_entry = db[p].copy()
259                 # remove original entry
260                 del db[p]
261                 # copy the entry into all aliases
262                 for alias in cfg.get('blend package aliases', p).split():
263                     print "Aliasing %s to %s" % (p, alias)
264                     db[alias] = copy.deepcopy(src_entry)
265
266     return db
267
268
269 def get_releaseinfo(rurl):
270     cache = AptListsCache()
271     # root URL of the repository
272     baseurl = '/'.join(rurl.split('/')[:-1])
273     # get the release file from the cache
274     release_file = cache.get(rurl)
275
276     # create parser instance
277     rp = deb822.Release(release_file)
278
279     # architectures on this dist
280     archs = rp['Architectures'].split()
281     components = rp['Components'].split()
282     # compile a new codename that also considers the repository label
283     # to distinguish between official and unofficial repos.
284     label = rp['Label']
285     origin = rp['Origin']
286     codename = rp['Codename']
287     labelcode = '_'.join([rp['Label'], rp['Codename']])
288
289     # cleanup
290     release_file.close()
291
292     return {'baseurl': baseurl, 'archs': archs, 'components': components,
293             'codename': codename, 'label': label, 'labelcode': labelcode,
294             'origin': origin}
295
296
297 def build_pkgsurl(baseurl, component, arch):
298     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
299
300
301 def import_release(cfg, db, rurl):
302     cache = AptListsCache()
303
304     ri = get_releaseinfo(rurl)
305
306     # compile the list of Packages files to parse and parse them
307     for c in ri['components']:
308         for a in ri['archs']:
309             # compile packages URL
310             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
311
312             # retrieve from cache
313             packages_file = cache.get(pkgsurl)
314
315             # parse
316             for stanza in deb822.Packages.iter_paragraphs(packages_file):
317                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
318
319             # cleanup
320             packages_file.close()
321
322     return db
323
324 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
325     """
326     :Parameter:
327       st: Package section
328     """
329     pkg = st['Package']
330
331     # only care for known packages
332     if not db.has_key(pkg):
333 #        print 'Ignoring NeuroDebian package "%s"' % pkg
334         return db
335
336     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
337
338     if db[pkg].has_key(distkey):
339         info = db[pkg][distkey]
340     else:
341         info = {'architecture': []}
342
343     # fill in data
344     if not st['Architecture'] in info['architecture']:
345         info['architecture'].append(st['Architecture'])
346     info['maintainer'] = st['Maintainer']
347     if st.has_key('Homepage'):
348         info['homepage'] = st['Homepage']
349     info['version'] = st['Version']
350
351     # origin
352     info['distribution'] = origin
353     info['release'] = codename
354     info['component'] = component
355
356     # pool url
357     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
358
359     # pkg description
360     descr = st['Description'].replace('%', '%%').split('\n')
361     info['description'] = descr[0].strip()
362     info['long_description'] = u'\n'.join(descr[1:])
363
364     db[pkg][distkey] = info
365
366     # charge the basic property set
367     db[pkg]['main']['description'] = info['description']
368     db[pkg]['main']['long_description'] = info['long_description']
369     if st.has_key('Source'):
370         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
371     else:
372         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
373     if st.has_key('Homepage'):
374         db[pkg]['main']['homepage'] = st['Homepage']
375     if st.has_key('Recommends'):
376         db[pkg]['main']['recommends'] = st['Recommends']
377
378     return db
379
380
381 def trans_codename(codename, cfg):
382     """Translate a known codename into a release description.
383
384     Unknown codenames will simply be returned as is.
385     """
386     # if we know something, tell
387     if codename in cfg.options('release codenames'):
388         return cfg.get('release codenames', codename)
389     else:
390         return codename
391
392
393 def create_dir(path):
394     if os.path.exists(path):
395         return
396
397     ps = path.split(os.path.sep)
398
399     for i in range(1,len(ps) + 1):
400         p = os.path.sep.join(ps[:i])
401
402         if not os.path.exists(p):
403             os.mkdir(p)
404
405
406 def dde_get(url, fail=False):
407     # enforce delay to be friendly to DDE
408     time.sleep(3)
409     try:
410         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
411         print "SUCCESS:", url
412         return data
413     except urllib2.HTTPError, e:
414         print "NOINFO:", url, type(e)
415         return False
416     except urllib2.URLError, e:
417         print "URLERROR:", url, type(e)
418         if fail:
419             print "Permanant failure"
420             return False
421         print "Try again after 30 seconds..."
422         time.sleep(30)
423         return dde_get(url, fail=True)
424     except (StopIteration):
425         print "NOINFO:", url
426         return False
427     except json.ReadException, e:
428         print "UDD-DOWN?:", url, type(e)
429         return False
430
431
432 def nitrc_get(spec, fail=False):
433     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
434     try:
435         data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
436         print "NITRC-SUCCESS:", spec
437     except urllib2.HTTPError, e:
438         print "NITRC-NOINFO:", spec, type(e)
439         return False
440     except urllib2.URLError, e:
441         print "NITRC-URLERROR:", spec, type(e)
442         if fail:
443             print "Permanant failure"
444             return False
445         print "Try again after 30 seconds..."
446         time.sleep(30)
447         return nitrc_get(spec, fail=True)
448     return data
449
450
451 def parse_nitrc(data):
452     if data is False:
453         return None
454     # simplify -- there is only one project in the data
455     project = data['projects'][0]
456     nitrc_filtered = {'downloads': 0,
457                       'id': project['id']}
458     for pkg in project['packages']:
459         for release in pkg['releases']:
460             for file in release['files']:
461                 nitrc_filtered['downloads'] += file['download_count']
462     return nitrc_filtered
463
464
465 def import_nitrc(cfg, db):
466     for p in db.keys():
467         if not cfg.has_option("nitrc ids", p):
468             continue
469         nitrc_spec = cfg.get("nitrc ids", p)
470         nitrc_data = nitrc_get(nitrc_spec)
471         nitrc_excerpt = parse_nitrc(nitrc_data)
472         if not nitrc_excerpt is None:
473             db[p]['nitrc'] = nitrc_excerpt
474     return db
475
476
477 def import_dde(cfg, db):
478     query_url = cfg.get('dde', 'pkgquery_url')
479     for p in db.keys():
480         # get freshest
481         q = dde_get(query_url + "/packages/all/%s" % p)
482         if q:
483             # copy all stuff, while preserving non-overlapping information
484             for k, v in q.iteritems():
485                 db[p]['main'][k] = v
486             # get latest popcon info for debian and ubuntu
487             # cannot use origin field itself, since it is none for few packages
488             # i.e. python-nifti
489             origin = q['drc'].split()[0]
490             if origin == 'ubuntu':
491                 if q.has_key('popcon'):
492                     db[p]['main']['ubuntu_popcon'] = q['popcon']
493                 # if we have ubuntu, need to get debian
494                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
495                 if q and q.has_key('popcon'):
496                     db[p]['main']['debian_popcon'] = q['popcon']
497             elif origin == 'debian':
498                 if q.has_key('popcon'):
499                     db[p]['main']['debian_popcon'] = q['popcon']
500                 # if we have debian, need to get ubuntu
501                 q = dde_get(query_url + "/packages/prio-ubuntu-lucid/%s" % p)
502                 if q and q.has_key('popcon'):
503                     db[p]['main']['ubuntu_popcon'] = q['popcon']
504             else:
505                 print("Ignoring unkown origin '%s' for package '%s'." \
506                         % (origin, p))
507
508         # now get info for package from all releases in UDD
509         q = dde_get(query_url + "/dist/p:%s" % p)
510         if not q:
511             continue
512         # hold all info about this package per distribution release
513         info = {}
514         for cp in q:
515             distkey = (trans_codename(cp['release'], cfg),
516                        "%s-%s" % (cp['distribution'], cp['release']))
517             if not info.has_key(distkey):
518                 info[distkey] = cp
519                 # turn into a list to append others later
520                 info[distkey]['architecture'] = [info[distkey]['architecture']]
521             # accumulate data for multiple over archs
522             else:
523                 comp = apt.VersionCompare(cp['version'],
524                                           info[distkey]['version'])
525                 # found another arch for the same version
526                 if comp == 0:
527                     info[distkey]['architecture'].append(cp['architecture'])
528                 # found newer version, dump the old ones
529                 elif comp > 0:
530                     info[distkey] = cp
531                     # turn into a list to append others later
532                     info[distkey]['architecture'] = [info[distkey]['architecture']]
533                 # simply ignore older versions
534                 else:
535                     pass
536
537         # finally assign the new package data
538         for k, v in info.iteritems():
539             db[p][k] = v
540
541     return db
542
543
544 def convert_longdescr(ld):
545     ld = ld.replace('% ', '%% ')
546     ld = ld.split('\n')
547     for i, l in enumerate(ld):
548         if l == ' .':
549             ld[i] = ' #NEWLINEMARKER#'
550         # look for embedded lists
551         elif len(l) >=3 and l[:2] == '  ' and l[2] in '-*':
552             ld[i] = ' #NEWLINEMARKER# ' + l[2:]
553
554     ld = u' '.join([l[1:] for l in ld])
555     ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
556     # cleanup any leftover (e.g. trailing markers)
557     ld = ld.replace('#NEWLINEMARKER#', '')
558     return ld
559
560
561 def generate_pkgpage(pkg, cfg, db, template, addenum_dir):
562     # local binding for ease of use
563     pkgdb = db[pkg]
564     # do nothing if there is not at least the very basic stuff
565     if not pkgdb['main'].has_key('description'):
566         return
567     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
568     underline = '*' * (len(title) + 2)
569     title = '%s\n %s\n%s' % (underline, title, underline)
570
571     page = template.render(
572             pkg=pkg,
573             title=title,
574             long_description=convert_longdescr(pkgdb['main']['long_description']),
575             cfg=cfg,
576             db=pkgdb,
577             fulldb=db)
578     # the following can be replaced by something like
579     # {% include "sidebar.html" ignore missing %}
580     # in the template whenever jinja 2.2 becomes available
581     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
582     if os.path.exists(addenum):
583         page += '\n\n.. include:: %s\n' % addenum
584     return page
585
586
587 def store_db(db, filename):
588     pp = PrettyPrinter(indent=2)
589     f = codecs.open(filename, 'w', 'utf-8')
590     f.write(pp.pformat(db))
591     f.close()
592
593
594 def read_db(filename):
595     f = codecs.open(filename, 'r', 'utf-8')
596     db = eval(f.read())
597     return db
598
599 def write_sourceslist(jinja_env, cfg, outdir):
600     create_dir(outdir)
601     create_dir(os.path.join(outdir, '_static'))
602
603     repos = {}
604     for release in cfg.options('release codenames'):
605         transrel = trans_codename(release, cfg)
606         repos[transrel] = []
607         for mirror in cfg.options('mirrors'):
608             listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
609             repos[transrel].append((mirror, listname))
610             lf = open(os.path.join(outdir, '_static', listname), 'w')
611             aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
612                                                       release)
613             lf.write('deb %s' % aptcfg)
614             lf.write('deb-src %s' % aptcfg)
615             lf.close()
616
617     srclist_template = jinja_env.get_template('sources_lists.rst')
618     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
619     sl.write(srclist_template.render(repos=repos))
620     sl.close()
621
622
623 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir):
624     create_dir(outdir)
625     create_dir(os.path.join(outdir, 'pkgs'))
626
627     # generate the TOC with all packages
628     toc_template = jinja_env.get_template('pkgs_toc.rst')
629     toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
630     toc.write(toc_template.render(pkgs=db.keys()))
631     toc.close()
632
633     # and now each individual package page
634     pkg_template = jinja_env.get_template('pkg.rst')
635     for p in db.keys():
636         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir)
637         # when no page is available skip this package
638         if page is None:
639             continue
640         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
641         pf.write(page)
642         pf.close()
643
644
645 def prepOptParser(op):
646     # use module docstring for help output
647     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
648
649     op.add_option("--db",
650                   action="store", type="string", dest="db",
651                   default=None,
652                   help="Database file to read. Default: None")
653
654     op.add_option("--cfg",
655                   action="store", type="string", dest="cfg",
656                   default=None,
657                   help="Repository config file.")
658
659     op.add_option("-o", "--outdir",
660                   action="store", type="string", dest="outdir",
661                   default=None,
662                   help="Target directory for ReST output. Default: None")
663
664     op.add_option("-r", "--release-url",
665                   action="append", dest="release_urls",
666                   help="None")
667
668     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
669                   type="string", default=None, help="None")
670
671
672 def main():
673     op = OptionParser(version="%prog 0.0.2")
674     prepOptParser(op)
675
676     (opts, args) = op.parse_args()
677
678     if len(args) != 1:
679         print('There needs to be exactly one command')
680         sys.exit(1)
681
682     cmd = args[0]
683
684     if opts.cfg is None:
685         print("'--cfg' option is mandatory.")
686         sys.exit(1)
687     if opts.db is None:
688         print("'--db' option is mandatory.")
689         sys.exit(1)
690
691
692     cfg = SafeConfigParser()
693     cfg.read(opts.cfg)
694
695     # load existing db, unless renew is requested
696     if cmd == 'updatedb':
697         db = {}
698         if cfg.has_option('packages', 'select taskfiles'):
699             db = add_pkgfromtaskfile(db, cfg.get('packages',
700                                                  'select taskfiles').split())
701
702         # add additional package names from config file
703         if cfg.has_option('packages', 'select names'):
704             for p in cfg.get('packages', 'select names').split():
705                 if not db.has_key(p):
706                     db[p] = get_emptydbentry()
707
708         # get info from task files
709         if cfg.has_option('packages', 'prospective'):
710             for url in cfg.get('packages', 'prospective').split():
711                 db = import_blendstask(cfg, db, url)
712
713         # parse NeuroDebian repository
714         if cfg.has_option('neurodebian', 'releases'):
715             for rurl in cfg.get('neurodebian', 'releases').split():
716                 db = import_release(cfg, db, rurl)
717
718         # collect package information from DDE
719         db = import_dde(cfg, db)
720         # get info from NITRC
721         db = import_nitrc(cfg, db)
722         # store the new DB
723         store_db(db, opts.db)
724         # and be done
725         return
726
727     # load the db from file
728     db = read_db(opts.db)
729
730     # fire up jinja
731     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
732
733     # generate package pages and TOC and write them to files
734     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir)
735
736     write_sourceslist(jinja_env, cfg, opts.outdir)
737
738 if __name__ == "__main__":
739     main()