]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
Actually adding recently released Ubuntu 11.04 oneiric
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 from debian_bundle import deb822
8 import numpy as np
9 import jinja2
10
11 # Lets first assure no guarding (but annoying) warnings
12 import warnings
13 warnings.simplefilter('ignore', FutureWarning)
14 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
15
16 import apt
17 from ConfigParser import SafeConfigParser
18 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
19 import sys
20 import os
21 import copy
22 import shutil
23 import urllib2
24 import urllib
25 import codecs
26 import subprocess
27 import time
28 import re
29 # templating
30 from jinja2 import Environment, PackageLoader
31
32 from pprint import PrettyPrinter
33
34
35 class AptListsCache(object):
36     def __init__(self, cachedir='build/cache',
37                  ro_cachedirs=None,
38                  init_db=None):
39         self.cachedir = cachedir
40
41         if not ro_cachedirs is None:
42             self.ro_cachedirs = ro_cachedirs
43         else:
44             self.ro_cachedirs = []
45
46         # create cachedir
47         create_dir(self.cachedir)
48
49     def get(self, url, update=False):
50         """Looks in the cache if the file is there and takes the cached one.
51         Otherwise it is downloaded first.
52
53         Knows how to deal with http:// and svn:// URLs.
54
55         :Return:
56           file handler
57         """
58         # look whether it is compressed
59         cext = url.split('.')[-1]
60         if cext in ['gz', 'bz2']:
61             target_url = url[:-1 * len(cext) -1]
62         else:
63             # assume not compressed
64             target_url = url
65             cext = None
66
67         # turn url into a filename -- mimik what APT does for
68         # /var/lib/apt/lists/
69         tfilename = '_'.join(target_url.split('/')[2:])
70
71         # if we need to download anyway do not search
72         if update:
73             cfilename = os.path.join(self.cachedir, tfilename)
74         else:
75             # look for the uncompressed file anywhere in the cache
76             cfilename = None
77             for cp in [self.cachedir] + self.ro_cachedirs:
78                 if os.path.exists(os.path.join(cp, tfilename)):
79                     cfilename = os.path.join(cp, tfilename)
80
81         # nothing found?
82         if cfilename is None:
83             # add cache item
84             cfilename = os.path.join(self.cachedir, tfilename)
85             update = True
86
87         # if updated needed -- download
88         if update:
89             #print 'Caching file from %s' % url
90
91             if url.startswith('svn://'):
92                 # export from SVN
93                 pysvn.Client().export(url, cfilename)
94             if url.startswith('http://'):
95                 # download
96                 tempfile, ignored = urllib.urlretrieve(url)
97
98                 # decompress
99                 decompressor = None
100                 if cext == 'gz':
101                     decompressor = 'gzip'
102                 elif cext == 'bz2':
103                     decompressor = 'bzip2'
104                 elif cext == None:
105                     decompressor = None
106                 else:
107                     raise ValueError, \
108                           "Don't know how to decompress %s files" \
109                           % cext
110
111                 if not decompressor is None:
112                     if subprocess.call([decompressor, '-d', '-q', '-f',
113                                        tempfile]) == 1:
114                         raise RuntimeError, \
115                               "Something went wrong while decompressing '%s'" \
116                               % tempfile
117
118                 # move decompressed file into cache
119                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
120
121                 # XXX do we need that if explicit filename is provided?
122                 urllib.urlcleanup()
123
124         # open cached file
125         fh = codecs.open(cfilename, 'r', 'utf-8')
126
127         return fh
128
129
130 def add_pkgfromtaskfile(db, urls):
131     cache = AptListsCache()
132     pkgs = []
133
134     for task in urls:
135         fh = cache.get(task)
136
137         # loop over all stanzas
138         for stanza in deb822.Packages.iter_paragraphs(fh):
139             if stanza.has_key('Depends'):
140                 pkg = stanza['Depends']
141             elif stanza.has_key('Recommends'):
142                 pkg = stanza['Recommends']
143             elif stanza.has_key('Suggests'):
144                 pkg = stanza['Suggests']
145             else:
146                 continue
147
148             # account for multiple packages per line
149             if pkg.count(','):
150                 pkgs += [p.strip() for p in pkg.split(',')]
151             else:
152                 pkgs.append(pkg.strip())
153
154     for p in pkgs:
155         if not db.has_key(p):
156             db[p] = get_emptydbentry()
157
158     return db
159
160 def get_emptydbentry():
161     return {'main': {}}
162
163 def import_blendstask(cfg, db, url):
164     cache = AptListsCache()
165     fh = cache.get(url)
166     task_name = None
167
168     # figure out blend's task page URL, since they differ from blend to blend
169     urlsec = url.split('/')
170     blendname = urlsec[-3]
171     if blendname == 'debian-med':
172         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
173     elif blendname == 'debian-science':
174         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
175     else:
176         raise ValueError('Unknown blend "%s"' % blendname)
177     taskpage_url += urlsec[-1]
178
179     for st in deb822.Packages.iter_paragraphs(fh):
180         if st.has_key('Task'):
181             task_name = st['Task']
182             task = (blendname, task_name, taskpage_url)
183
184         if st.has_key('Depends'):
185             pkg = st['Depends']
186         elif st.has_key('Recommends'):
187             pkg = st['Recommends']
188         elif st.has_key('Suggests'):
189             pkg = st['Suggests']
190         else:
191 #            print 'Warning: Cannot determine name of prospective package ' \
192 #                    '... ignoring. Dump follows:'
193 #            print st
194             continue
195
196         # take care of pkg lists
197         for p in pkg.split(', '):
198             if not db.has_key(p):
199                 print 'Ignoring blend package "%s"' % p
200                 continue
201
202             info = {}
203
204             # blends info
205             info['tasks'] = [task]
206             if st.has_key('License'):
207                 info['license'] = st['License']
208             if st.has_key('Responsible'):
209                 info['responsible'] = st['Responsible']
210
211             # pkg description
212             if st.has_key('Pkg-Description'):
213                 descr = st['Pkg-Description'].split('\n')
214                 info['description'] = descr[0].strip()
215                 info['long_description'] = \
216                         u'\n'.join(descr[1:])
217
218                 # charge the basic property set
219                 db[p]['main']['description'] = info['description']
220                 db[p]['main']['long_description'] = info['long_description']
221             if st.has_key('WNPP'):
222                 db[p]['main']['debian_itp'] = st['WNPP']
223             if st.has_key('Pkg-URL'):
224                 db[p]['main']['other_pkg'] = st['Pkg-URL']
225             if st.has_key('Homepage'):
226                 db[p]['main']['homepage'] = st['Homepage']
227
228             # Publications
229             if st.has_key('Published-Title'):
230                 title = st['Published-Title']
231                 if title[-1] == '.':
232                     # trip trailing dot -- added later
233                     pub = {'title': title[:-1]}
234                 else:
235                     pub = {'title': title}
236                 if st.has_key('Published-Authors'):
237                     pub['authors'] = st['Published-Authors']
238                 if st.has_key('Published-Year'):
239                     pub['year'] = st['Published-Year']
240                 if st.has_key('Published-In'):
241                     pub['in'] = st['Published-In']
242                 if st.has_key('Published-URL'):
243                     pub['url'] = st['Published-URL']
244                 if st.has_key('Published-DOI'):
245                     pub['doi'] = st['Published-DOI']
246                     # need at least one URL
247                     if not pub.has_key('url'):
248                         pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
249
250                 db[p]['main']['publication'] = pub
251
252             # Registration
253             if st.has_key('Registration'):
254                 db[p]['main']['registration'] = st['Registration']
255
256             # Remarks
257             if st.has_key('Remark'):
258                 # prepend a single space to make it look like a long description
259                 info['remark'] = convert_longdescr(' ' + st['Remark'])
260
261             # only store if there isn't something already
262             if not db[p].has_key('blends'):
263                 db[p]['blends'] = info
264             else:
265                 # just add this tasks name and id
266                 db[p]['blends']['tasks'].append(task)
267
268             # handle pkg name aliases
269             if p in cfg.options('blend package aliases'):
270                 src_entry = db[p].copy()
271                 # remove original entry
272                 del db[p]
273                 # copy the entry into all aliases
274                 for alias in cfg.get('blend package aliases', p).split():
275                     print "Aliasing %s to %s" % (p, alias)
276                     db[alias] = copy.deepcopy(src_entry)
277
278     return db
279
280
281 def get_releaseinfo(rurl):
282     cache = AptListsCache()
283     # root URL of the repository
284     baseurl = '/'.join(rurl.split('/')[:-1])
285     # get the release file from the cache
286     release_file = cache.get(rurl)
287
288     # create parser instance
289     rp = deb822.Release(release_file)
290
291     # architectures on this dist
292     archs = rp['Architectures'].split()
293     components = rp['Components'].split()
294     # compile a new codename that also considers the repository label
295     # to distinguish between official and unofficial repos.
296     label = rp['Label']
297     origin = rp['Origin']
298     codename = rp['Codename']
299     labelcode = '_'.join([rp['Label'], rp['Codename']])
300
301     # cleanup
302     release_file.close()
303
304     return {'baseurl': baseurl, 'archs': archs, 'components': components,
305             'codename': codename, 'label': label, 'labelcode': labelcode,
306             'origin': origin}
307
308
309 def build_pkgsurl(baseurl, component, arch):
310     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
311
312
313 def import_release(cfg, db, rurl):
314     cache = AptListsCache()
315
316     ri = get_releaseinfo(rurl)
317
318     # compile the list of Packages files to parse and parse them
319     for c in ri['components']:
320         for a in ri['archs']:
321             # compile packages URL
322             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
323
324             # retrieve from cache
325             packages_file = cache.get(pkgsurl)
326
327             # parse
328             for stanza in deb822.Packages.iter_paragraphs(packages_file):
329                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
330
331             # cleanup
332             packages_file.close()
333
334     return db
335
336 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
337     """
338     :Parameter:
339       st: Package section
340     """
341     pkg = st['Package']
342
343     # only care for known packages
344     if not db.has_key(pkg):
345 #        print 'Ignoring NeuroDebian package "%s"' % pkg
346         return db
347
348     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
349
350     if db[pkg].has_key(distkey):
351         info = db[pkg][distkey]
352     else:
353         info = {'architecture': []}
354
355     # fill in data
356     if not st['Architecture'] in info['architecture']:
357         info['architecture'].append(st['Architecture'])
358     info['maintainer'] = st['Maintainer']
359     if st.has_key('Homepage'):
360         info['homepage'] = st['Homepage']
361     info['version'] = st['Version']
362
363     # origin
364     info['distribution'] = origin
365     info['release'] = codename
366     info['component'] = component
367
368     # pool url
369     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
370
371     # pkg description
372     descr = st['Description'].replace('%', '%%').split('\n')
373     info['description'] = descr[0].strip()
374     info['long_description'] = u'\n'.join(descr[1:])
375
376     db[pkg][distkey] = info
377
378     # charge the basic property set
379     db[pkg]['main']['description'] = info['description']
380     db[pkg]['main']['long_description'] = info['long_description']
381     if st.has_key('Source'):
382         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
383     else:
384         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
385     if st.has_key('Homepage'):
386         db[pkg]['main']['homepage'] = st['Homepage']
387     if st.has_key('Recommends'):
388         db[pkg]['main']['recommends'] = st['Recommends']
389
390     return db
391
392
393 def trans_codename(codename, cfg):
394     """Translate a known codename into a release description.
395
396     Unknown codenames will simply be returned as is.
397     """
398     # if we know something, tell
399     if codename in cfg.options('release codenames'):
400         return cfg.get('release codenames', codename)
401     else:
402         return codename
403
404
405 def create_dir(path):
406     if os.path.exists(path):
407         return
408
409     ps = path.split(os.path.sep)
410
411     for i in range(1,len(ps) + 1):
412         p = os.path.sep.join(ps[:i])
413
414         if not os.path.exists(p):
415             os.mkdir(p)
416
417
418 def dde_get(url, fail=False):
419     # enforce delay to be friendly to DDE
420     time.sleep(3)
421     try:
422         data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
423         print "SUCCESS:", url
424         return data
425     except urllib2.HTTPError, e:
426         print "NOINFO:", url, type(e)
427         return False
428     except urllib2.URLError, e:
429         print "URLERROR:", url, type(e)
430         if fail:
431             print "Permanant failure"
432             return False
433         print "Try again after 30 seconds..."
434         time.sleep(30)
435         return dde_get(url, fail=True)
436     except (StopIteration):
437         print "NOINFO:", url
438         return False
439     except json.ReadException, e:
440         print "UDD-DOWN?:", url, type(e)
441         return False
442
443
444 def nitrc_get(spec, fail=False):
445     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
446     try:
447         # change into this from python 2.6 on
448         #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
449         data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
450         print "NITRC-SUCCESS:", spec
451     except urllib2.HTTPError, e:
452         print "NITRC-NOINFO:", spec, type(e)
453         return False
454     except urllib2.URLError, e:
455         print "NITRC-URLERROR:", spec, type(e)
456         if fail:
457             print "Permanant failure"
458             return False
459         print "Try again after 30 seconds..."
460         time.sleep(30)
461         return nitrc_get(spec, fail=True)
462     return data
463
464
465 def parse_nitrc(data):
466     if data is False:
467         return None
468     # simplify -- there is only one project in the data
469     project = data['projects'][0]
470     nitrc_filtered = {'downloads': 0,
471                       'id': project['id']}
472     for pkg in project['packages']:
473         for release in pkg['releases']:
474             for file in release['files']:
475                 nitrc_filtered['downloads'] += file['download_count']
476     return nitrc_filtered
477
478
479 def import_nitrc(cfg, db):
480     for p in db.keys():
481         if not cfg.has_option("nitrc ids", p):
482             continue
483         nitrc_spec = cfg.get("nitrc ids", p)
484         nitrc_data = nitrc_get(nitrc_spec)
485         nitrc_excerpt = parse_nitrc(nitrc_data)
486         if not nitrc_excerpt is None:
487             db[p]['nitrc'] = nitrc_excerpt
488     return db
489
490
491 def import_dde(cfg, db):
492     query_url = cfg.get('dde', 'pkgquery_url')
493     for p in db.keys():
494         # get freshest
495         q = dde_get(query_url + "/packages/all/%s" % p)
496         if q:
497             # copy all stuff, while preserving non-overlapping information
498             for k, v in q.iteritems():
499                 db[p]['main'][k] = v
500             # get latest popcon info for debian and ubuntu
501             # cannot use origin field itself, since it is none for few packages
502             # i.e. python-nifti
503             origin = q['drc'].split()[0]
504             if origin == 'ubuntu':
505                 if q.has_key('popcon'):
506                     db[p]['main']['ubuntu_popcon'] = q['popcon']
507                 # if we have ubuntu, need to get debian
508                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
509                 if q and q.has_key('popcon'):
510                     db[p]['main']['debian_popcon'] = q['popcon']
511             elif origin == 'debian':
512                 if q.has_key('popcon'):
513                     db[p]['main']['debian_popcon'] = q['popcon']
514                 # if we have debian, need to get ubuntu
515                 q = dde_get(query_url + "/packages/prio-ubuntu-oneiric/%s" % p)
516                 if q and q.has_key('popcon'):
517                     db[p]['main']['ubuntu_popcon'] = q['popcon']
518             else:
519                 print("Ignoring unkown origin '%s' for package '%s'." \
520                         % (origin, p))
521
522         # now get info for package from all releases in UDD
523         q = dde_get(query_url + "/dist/p:%s" % p)
524         if not q:
525             continue
526         # hold all info about this package per distribution release
527         info = {}
528         for cp in q:
529             distkey = (trans_codename(cp['release'], cfg),
530                        "%s-%s" % (cp['distribution'], cp['release']))
531             if not info.has_key(distkey):
532                 info[distkey] = cp
533                 # turn into a list to append others later
534                 info[distkey]['architecture'] = [info[distkey]['architecture']]
535             # accumulate data for multiple over archs
536             else:
537                 comp = apt.VersionCompare(cp['version'],
538                                           info[distkey]['version'])
539                 # found another arch for the same version
540                 if comp == 0:
541                     info[distkey]['architecture'].append(cp['architecture'])
542                 # found newer version, dump the old ones
543                 elif comp > 0:
544                     info[distkey] = cp
545                     # turn into a list to append others later
546                     info[distkey]['architecture'] = [info[distkey]['architecture']]
547                 # simply ignore older versions
548                 else:
549                     pass
550
551         # finally assign the new package data
552         for k, v in info.iteritems():
553             db[p][k] = v
554
555     return db
556
557 def assure_unicode(s):
558     """Assure that argument is unicode
559
560     Necessary if strings are not carrying out Pythonish 'u' prefix to
561     signal UTF8 strings, but are in fact UTF8
562     """
563     if type(s) is unicode:
564         return s
565     elif type(s) is str:
566         # attempt regular unicode call and if fails -- just decode it
567         # into utf8
568         try:
569             return unicode(s)
570         except UnicodeDecodeError, e:
571             return s.decode('utf8')
572     else:
573         return assure_unicode(str(s))
574
575
576 def convert_longdescr(ld):
577     """
578
579     yoh: I think all this long description conversion will keep giving
580     us problems since per se there is no strict regulations,
581     especially in blends files
582     """
583     descr = u''
584     ld = ld.replace('% ', '%% ')
585     ld = ld.replace(r'\t', '    ') # just in case assuming tab 4
586     ld = ld.split('\n')
587     re_leadblanks = re.compile("^ *")
588     re_itemized = re.compile("^[o*-+] +")
589     re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
590     re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
591
592     def unwrap_lines(lines):
593         out = []
594         indent_levels = [-1]
595         for l in lines:
596             match = re_itemized_gr.search(l).groups()
597             if ((len(match[0]) in indent_levels and match[1] is None)
598                 or (len(match[0]) > max(indent_levels)+4)) \
599                 and match[2].strip() != '.':
600                 # append to previous
601                 if not out[-1].endswith(" "):
602                     out[-1] += " "
603                 out[-1] += match[2]
604             else:
605                 out.append(l)
606
607             indent_levels = [len(match[0])]
608             if match[1] is not None:
609                 indent_levels += [len(match[0]) + len(match[1])]
610             if match[2].strip() == '.':
611                 # reset though if '.'
612                 indent_levels = [-1]
613         return out
614
615     def dedent_withlevel(lines):
616         """Dedent `lines` given in a list provide dedented lines and how much was dedented
617         """
618         nleading = min([re_leadblanks.search(l).span()[1]
619                         for l in lines])
620         return [l[nleading:] for l in lines], nleading
621
622     def block_lines(ld, level=0):
623         # so we got list of lines
624         # dedent all of them first
625         ld, level = dedent_withlevel(ld)
626
627         # lets collect them in blocks/paragraphs
628         # 1. into paragraphs split by '.'
629         blocks, block = [], None
630
631         # next block can begin if
632         #  1.  . line
633         #  2. it was an itemized list and all items begin with
634         #     the same symbol or get further indented accordingly
635         #     so let's first check if it is an itemized list
636         itemized_match = re_itemized.search(ld[0])
637         if itemized_match:
638             allow_indents = " "*itemized_match.span()[1]
639         else:
640             allow_indents = None
641         for l in ld:
642             if block is None or l.strip() == '.' \
643                    or (len(l) and ( len(block) and (
644                 (l.startswith(' ') and not block[-1].startswith(' '))
645                 or
646                 (not l.startswith(' ') and block[-1].startswith(' '))))):
647                 block = []
648                 blocks.append(block)
649             if l.strip() != '.':
650                 block.append(l)
651         if len(blocks) == 1:
652             return blocks[0]
653         else:
654             return [block_lines(b, level+1) for b in blocks if len(b)]
655
656     def blocks_to_rst(bls, level=0):
657         # check if this block is an itemized beast
658         #itemized_match = re_itemized_gr.search(bls[0][0])
659         #if itemized_match:
660         #    res += ' 'allow_indents = " "*itemized_match.span()[1]
661         out = ''
662         for b in bls:
663             if isinstance(b, list):
664                 if len(b) == 1:
665                     out += " "*level + b[0] + '\n\n'
666                 else:
667                     out += blocks_to_rst(b, level+1)
668             else:
669                 e = " "*level + b + '\n'
670                 if not re_itemized.search(b):
671                     pass
672                     #e += '\n'
673                 elif len(e) and e[0] == ' ':
674                     # strip 1 leading blank
675                     e = e[1:]
676                 out += e
677         out += '\n'
678         return out
679
680     ld = unwrap_lines(ld)
681     bls = block_lines(ld)
682     return blocks_to_rst(bls)
683
684
685 def underline_text(text, symbol):
686     underline = symbol * len(text)
687     return '%s\n%s\n' % (text, underline)
688
689
690 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
691     # local binding for ease of use
692     pkgdb = db[pkg]
693     # do nothing if there is not at least the very basic stuff
694     if not pkgdb['main'].has_key('description'):
695         return
696     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
697     title = underline_text(title, '*')
698
699     ex_dir = None
700     if 'sv' in pkgdb['main']:
701         ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
702         if not os.path.exists(ex_dir):
703             ex_dir = None
704     page = template.render(
705             pkg=pkg,
706             title=title,
707             long_description=convert_longdescr(
708                 assure_unicode(pkgdb['main']['long_description'])),
709             cfg=cfg,
710             db=pkgdb,
711             fulldb=db,
712             extracts_dir=ex_dir,
713             op=os.path)
714     # the following can be replaced by something like
715     # {% include "sidebar.html" ignore missing %}
716     # in the template whenever jinja 2.2 becomes available
717     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
718     if os.path.exists(addenum):
719         page += '\n\n.. include:: %s\n' % addenum
720     return page
721
722
723 def store_db(db, filename):
724     pp = PrettyPrinter(indent=2)
725     f = codecs.open(filename, 'w', 'utf-8')
726     f.write(pp.pformat(db))
727     f.close()
728
729
730 def read_db(filename):
731     f = codecs.open(filename, 'r', 'utf-8')
732     db = eval(f.read())
733     return db
734
735 def write_sourceslist(jinja_env, cfg, outdir):
736     create_dir(outdir)
737     create_dir(os.path.join(outdir, 'lists'))
738
739     repos = {}
740     for release in cfg.options('release codenames'):
741         if release == 'data':
742             # no seperate list for the data archive
743             continue
744         transrel = trans_codename(release, cfg)
745         repos[transrel] = []
746         for mirror in cfg.options('mirrors'):
747             listname = '%s.%s' % (release, mirror)
748             repos[transrel].append((mirror, listname))
749             lf = open(os.path.join(outdir, 'lists', listname), 'w')
750             for rel in ('data', release):
751                 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
752                                                           rel)
753                 lf.write('deb %s' % aptcfg)
754                 lf.write('#deb-src %s' % aptcfg)
755             lf.close()
756
757     id2codename = dict([(cfg.get('release backport ids', r), r)
758                             for r in cfg.options('release codenames')])
759     id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
760                             for r in cfg.options('release codenames')])
761     mirror2name = dict([(m, cfg.get('mirror names', m))
762                             for m in cfg.options('mirrors')])
763     mirror2url = dict([(m, cfg.get('mirrors', m))
764                             for m in cfg.options('mirrors')])
765     srclist_template = jinja_env.get_template('sources_lists.rst')
766     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
767     sl.write(srclist_template.render(id2codename=id2codename,
768                                      id2relname=id2relname,
769                                      mirror2name=mirror2name,
770                                      mirror2url=mirror2url))
771     sl.close()
772
773
774 def sort_by_tasks(db):
775     tasks = {}
776     for pkg in db.keys():
777         if not 'blends' in db[pkg]:
778             # no blend info
779             continue
780         blendinfo = db[pkg]['blends']
781         if not 'tasks' in blendinfo:
782             # no task info in blend data
783             continue
784         taskinfo = blendinfo['tasks']
785         for task in taskinfo:
786             taskname = task[1]
787             if not taskname in tasks:
788                 tasks[taskname] = []
789             else:
790                 tasks[taskname].append(pkg)
791     return tasks
792
793
794 def sort_by_maintainer(db):
795     maints = {}
796     maint_ids = {}
797     for pkg in db.keys():
798         maint = None
799         pkginfo = db[pkg]
800         # start with the blends info
801         if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
802             maint = pkginfo['blends']['responsible']
803         if not 'main' in db[pkg] and maint is None:
804             # no info
805             continue
806         info = db[pkg]['main']
807         if not 'maintainer' in info and maint is None:
808             # no maintainer info
809             continue
810         if 'original_maintainer' in info and not info['original_maintainer'] is None:
811             maint = info['original_maintainer']
812         elif 'maintainer' in info and not info['maintainer'] is None:
813             maint = info['maintainer']
814         if maint is None:
815             # no sane maintainer info
816             continue
817         # safeguard: <> confuses sphinx and we don't care about different emails
818         maint = maint[:maint.find('<')].strip()
819         # kick out non-ascii ones (should not be, but too tired to find the bug)
820         try:
821             codecs.ascii_decode(maint)
822         except UnicodeEncodeError:
823             continue
824         if not maint.lower() in maints:
825             maints[maint.lower()] = []
826             maint_ids[maint.lower()] = [maint]
827         else:
828             maint_ids[maint.lower()].append(maint)
829         maints[maint.lower()].append(pkg)
830     # remove duplicates
831     out = {}
832     for m in maints:
833         out[maint_ids[m][0]] = np.unique(maints[m])
834     return out
835
836
837 def sort_by_release(db):
838     rels = {}
839     for pkg in db.keys():
840         pkginfo = db[pkg]
841         for sec in pkginfo:
842             if not isinstance(sec, tuple):
843                 # only releases are of interest
844                 continue
845             relname = sec[0]
846             if not relname in rels:
847                 rels[relname] = []
848             else:
849                 rels[relname].append(pkg)
850     # remove duplicates
851     for r in rels:
852         rels[r] = np.unique(rels[r])
853     return rels
854
855
856 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
857     create_dir(outdir)
858     create_dir(os.path.join(outdir, 'pkgs'))
859     create_dir(os.path.join(outdir, 'pkglists'))
860     # template for individual package listings
861     toc_template = jinja_env.get_template('pkgs_toc.rst')
862     # the high-level package list overview
863     hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
864     hltoc.write('.. _pkglists:\n\n')
865     hltoc.write(underline_text('Software packages', '='))
866     defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
867             (sort_by_release(db), 'By release', 'Packages for %s'),
868             (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
869     for def_ in defs:
870         # TOC for each thingie
871         pkgsdict, sectitle, title_tmpl = def_
872         hltoc.write(underline_text(sectitle, '-'))
873         ids = pkgsdict.keys()
874         ids.sort()
875         for id_ in ids:
876             label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
877             # filter out crap
878             filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
879             if not len(filtered_pkgs):
880                 continue
881             plist = toc_template.render(
882                         label=label,
883                         title=underline_text(title_tmpl % id_, '='),
884                         pkgs=filtered_pkgs,
885                         db=db)
886             toc = codecs.open(os.path.join(outdir,
887                                            'pkglists',
888                                            '%s.rst' % label),
889                               'w', 'utf-8')
890             toc.write(plist)
891             toc.close()
892             hltoc.write('* :ref:`%s`\n' % label)
893         hltoc.write('\n\n')
894
895
896     # now a complete list of all packages
897     hltoc.write(underline_text('Complete list', '-'))
898     toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
899                       'w', 'utf-8')
900     toc.write(toc_template.render(label='full_pkg_list',
901                 title=underline_text('Complete package list', '='),
902                 pkgs=db.keys(), db=db))
903     toc.close()
904     hltoc.write('* :ref:`full_pkg_list`\n')
905     hltoc.close()
906
907     # and now each individual package page
908     pkg_template = jinja_env.get_template('pkg.rst')
909     for p in db.keys():
910         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
911         # when no page is available skip this package
912         if page is None:
913             continue
914         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
915         pf.write(page)
916         pf.close()
917
918
919 def prepOptParser(op):
920     # use module docstring for help output
921     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
922
923     op.add_option("--db",
924                   action="store", type="string", dest="db",
925                   default=None,
926                   help="Database file to read. Default: None")
927
928     op.add_option("--cfg",
929                   action="store", type="string", dest="cfg",
930                   default=None,
931                   help="Repository config file.")
932
933     op.add_option("-o", "--outdir",
934                   action="store", type="string", dest="outdir",
935                   default=None,
936                   help="Target directory for ReST output. Default: None")
937
938     op.add_option("-r", "--release-url",
939                   action="append", dest="release_urls",
940                   help="None")
941
942     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
943                   type="string", default=None, help="None")
944
945     op.add_option("--extracts", action="store", dest="extracts_dir",
946                   type="string", default=None, help="None")
947
948
949 def main():
950     op = OptionParser(version="%prog 0.0.2")
951     prepOptParser(op)
952
953     (opts, args) = op.parse_args()
954
955     if len(args) != 1:
956         print('There needs to be exactly one command')
957         sys.exit(1)
958
959     cmd = args[0]
960
961     if opts.cfg is None:
962         print("'--cfg' option is mandatory.")
963         sys.exit(1)
964     if opts.db is None:
965         print("'--db' option is mandatory.")
966         sys.exit(1)
967
968
969     cfg = SafeConfigParser()
970     cfg.read(opts.cfg)
971
972     if cmd == 'debug_ld':
973         # load the db from file
974         db = read_db(opts.db)
975
976         for p in db.keys():
977         #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
978             if not 'long_description' in db[p]['main']:
979                 continue
980             ld = db[p]['main']['long_description']
981
982             print ">>>>>>>>> ", p
983             #print ld
984             print "----"
985             print convert_longdescr(ld)
986         raise SystemExit
987     # load existing db, unless renew is requested
988     if cmd == 'updatedb':
989         db = {}
990         if cfg.has_option('packages', 'select taskfiles'):
991             db = add_pkgfromtaskfile(db, cfg.get('packages',
992                                                  'select taskfiles').split())
993
994         # add additional package names from config file
995         if cfg.has_option('packages', 'select names'):
996             for p in cfg.get('packages', 'select names').split():
997                 if not db.has_key(p):
998                     db[p] = get_emptydbentry()
999
1000         # get info from task files
1001         if cfg.has_option('packages', 'prospective'):
1002             for url in cfg.get('packages', 'prospective').split():
1003                 db = import_blendstask(cfg, db, url)
1004
1005         # parse NeuroDebian repository
1006         if cfg.has_option('neurodebian', 'releases'):
1007             for rurl in cfg.get('neurodebian', 'releases').split():
1008                 db = import_release(cfg, db, rurl)
1009
1010         # collect package information from DDE
1011         db = import_dde(cfg, db)
1012         # get info from NITRC
1013         db = import_nitrc(cfg, db)
1014         # store the new DB
1015         store_db(db, opts.db)
1016         # and be done
1017         return
1018
1019     # load the db from file
1020     db = read_db(opts.db)
1021
1022     # fire up jinja
1023     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
1024
1025     # generate package pages and TOC and write them to files
1026     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
1027
1028     write_sourceslist(jinja_env, cfg, opts.outdir)
1029
1030 if __name__ == "__main__":
1031     main()