]> git.donarmstrong.com Git - neurodebian.git/blob - neurodebian/dde.py
More robust against DB errors.
[neurodebian.git] / neurodebian / dde.py
1 #!/usr/bin/env python
2 """Tell me who you are!
3 """
4
5 import pysvn
6 import json
7 import numpy as np
8
9 from ConfigParser import SafeConfigParser
10 from optparse import OptionParser, OptionGroup, OptionConflictError
11
12 # Lets first assure no guarding (but annoying) warnings
13 import warnings
14 warnings.simplefilter('ignore', FutureWarning)
15 warnings.filterwarnings('ignore',
16                         'Module debian_bundle was already imported.*', UserWarning)
17
18 from debian import deb822
19 import apt                              # initializes the "_system" ;)
20 from apt_pkg import version_compare
21
22 import sys
23 import os
24 import copy
25 import shutil
26 import urllib2
27 import urllib
28 import codecs
29 import subprocess
30 import time
31 import re
32
33 # templating
34 from jinja2 import Environment, PackageLoader
35
36 from pprint import PrettyPrinter
37
38
39 class AptListsCache(object):
40     def __init__(self, cachedir='build/cache',
41                  ro_cachedirs=None,
42                  init_db=None):
43         self.cachedir = cachedir
44
45         if not ro_cachedirs is None:
46             self.ro_cachedirs = ro_cachedirs
47         else:
48             self.ro_cachedirs = []
49
50         # create cachedir
51         create_dir(self.cachedir)
52
53     def get(self, url, update=False):
54         """Looks in the cache if the file is there and takes the cached one.
55         Otherwise it is downloaded first.
56
57         Knows how to deal with http:// and svn:// URLs.
58
59         :Return:
60           file handler
61         """
62         # look whether it is compressed
63         cext = url.split('.')[-1]
64         if cext in ['gz', 'bz2']:
65             target_url = url[:-1 * len(cext) -1]
66         else:
67             # assume not compressed
68             target_url = url
69             cext = None
70
71         # turn url into a filename -- mimik what APT does for
72         # /var/lib/apt/lists/
73         tfilename = '_'.join(target_url.split('/')[2:])
74
75         # if we need to download anyway do not search
76         if update:
77             cfilename = os.path.join(self.cachedir, tfilename)
78         else:
79             # look for the uncompressed file anywhere in the cache
80             cfilename = None
81             for cp in [self.cachedir] + self.ro_cachedirs:
82                 if os.path.exists(os.path.join(cp, tfilename)):
83                     cfilename = os.path.join(cp, tfilename)
84
85         # nothing found?
86         if cfilename is None:
87             # add cache item
88             cfilename = os.path.join(self.cachedir, tfilename)
89             update = True
90
91         # if updated needed -- download
92         if update:
93             #print 'Caching file from %s' % url
94
95             if url.startswith('svn://'):
96                 # export from SVN
97                 pysvn.Client().export(url, cfilename)
98             if url.startswith('http://'):
99                 # download
100                 tempfile, ignored = urllib.urlretrieve(url)
101
102                 # decompress
103                 decompressor = None
104                 if cext == 'gz':
105                     decompressor = 'gzip'
106                 elif cext == 'bz2':
107                     decompressor = 'bzip2'
108                 elif cext == None:
109                     decompressor = None
110                 else:
111                     raise ValueError, \
112                           "Don't know how to decompress %s files" \
113                           % cext
114
115                 if not decompressor is None:
116                     if subprocess.call([decompressor, '-d', '-q', '-f',
117                                        tempfile]) == 1:
118                         raise RuntimeError, \
119                               "Something went wrong while decompressing '%s'" \
120                               % tempfile
121
122                 # move decompressed file into cache
123                 shutil.move(os.path.splitext(tempfile)[0], cfilename)
124
125                 # XXX do we need that if explicit filename is provided?
126                 urllib.urlcleanup()
127
128         # open cached file
129         fh = codecs.open(cfilename, 'r', 'utf-8')
130
131         return fh
132
133
134 def add_pkgfromtaskfile(db, urls):
135     cache = AptListsCache()
136     pkgs = []
137
138     for task in urls:
139         fh = cache.get(task)
140
141         # loop over all stanzas
142         for stanza in deb822.Packages.iter_paragraphs(fh):
143             if stanza.has_key('Depends'):
144                 pkg = stanza['Depends']
145             elif stanza.has_key('Recommends'):
146                 pkg = stanza['Recommends']
147             elif stanza.has_key('Suggests'):
148                 pkg = stanza['Suggests']
149             else:
150                 continue
151
152             # account for multiple packages per line
153             if pkg.count(','):
154                 pkgs += [p.strip() for p in pkg.split(',')]
155             else:
156                 pkgs.append(pkg.strip())
157
158     for p in pkgs:
159         if not db.has_key(p):
160             db[p] = get_emptydbentry()
161
162     return db
163
164 def get_emptydbentry():
165     return {'main': {}}
166
167 def import_blendstask(cfg, db, url):
168     cache = AptListsCache()
169     fh = cache.get(url)
170     task_name = None
171
172     # figure out blend's task page URL, since they differ from blend to blend
173     urlsec = url.split('/')
174     blendname = urlsec[-3]
175     if blendname == 'debian-med':
176         taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
177     elif blendname == 'debian-science':
178         taskpage_url = 'http://blends.alioth.debian.org/science/tasks/' 
179     else:
180         raise ValueError('Unknown blend "%s"' % blendname)
181     taskpage_url += urlsec[-1]
182
183     for st in deb822.Packages.iter_paragraphs(fh):
184         if st.has_key('Task'):
185             task_name = st['Task']
186             task = (blendname, task_name, taskpage_url)
187
188         if st.has_key('Depends'):
189             pkg = st['Depends']
190         elif st.has_key('Recommends'):
191             pkg = st['Recommends']
192         elif st.has_key('Suggests'):
193             pkg = st['Suggests']
194         else:
195 #            print 'Warning: Cannot determine name of prospective package ' \
196 #                    '... ignoring. Dump follows:'
197 #            print st
198             continue
199
200         # take care of pkg lists
201         for p in pkg.split(', '):
202             if not db.has_key(p):
203                 print 'Ignoring blend package "%s"' % p
204                 continue
205
206             info = {}
207
208             # blends info
209             info['tasks'] = [task]
210             if st.has_key('License'):
211                 info['license'] = st['License']
212             if st.has_key('Responsible'):
213                 info['responsible'] = st['Responsible']
214
215             # pkg description
216             if st.has_key('Pkg-Description'):
217                 descr = st['Pkg-Description'].split('\n')
218                 info['description'] = descr[0].strip()
219                 info['long_description'] = \
220                         u'\n'.join(descr[1:])
221
222                 # charge the basic property set
223                 db[p]['main']['description'] = info['description']
224                 db[p]['main']['long_description'] = info['long_description']
225             if st.has_key('WNPP'):
226                 db[p]['main']['debian_itp'] = st['WNPP']
227             if st.has_key('Pkg-URL'):
228                 db[p]['main']['other_pkg'] = st['Pkg-URL']
229             if st.has_key('Homepage'):
230                 db[p]['main']['homepage'] = st['Homepage']
231
232             # Publications
233             if st.has_key('Published-Title'):
234                 title = st['Published-Title']
235                 if title[-1] == '.':
236                     # trip trailing dot -- added later
237                     pub = {'title': title[:-1]}
238                 else:
239                     pub = {'title': title}
240                 if st.has_key('Published-Authors'):
241                     pub['authors'] = st['Published-Authors']
242                 if st.has_key('Published-Year'):
243                     pub['year'] = st['Published-Year']
244                 if st.has_key('Published-In'):
245                     pub['in'] = st['Published-In']
246                 if st.has_key('Published-URL'):
247                     pub['url'] = st['Published-URL']
248                 if st.has_key('Published-DOI'):
249                     pub['doi'] = st['Published-DOI']
250                     # need at least one URL
251                     if not pub.has_key('url'):
252                         pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
253
254                 db[p]['main']['publication'] = pub
255
256             # Registration
257             if st.has_key('Registration'):
258                 db[p]['main']['registration'] = st['Registration']
259
260             # Remarks
261             if st.has_key('Remark'):
262                 # prepend a single space to make it look like a long description
263                 info['remark'] = convert_longdescr(' ' + st['Remark'])
264
265             # only store if there isn't something already
266             if not db[p].has_key('blends'):
267                 db[p]['blends'] = info
268             else:
269                 # just add this tasks name and id
270                 db[p]['blends']['tasks'].append(task)
271
272             # handle pkg name aliases
273             if p in cfg.options('blend package aliases'):
274                 src_entry = db[p].copy()
275                 # remove original entry
276                 del db[p]
277                 # copy the entry into all aliases
278                 for alias in cfg.get('blend package aliases', p).split():
279                     print "Aliasing %s to %s" % (p, alias)
280                     db[alias] = copy.deepcopy(src_entry)
281
282     return db
283
284
285 def get_releaseinfo(rurl):
286     cache = AptListsCache()
287     # root URL of the repository
288     baseurl = '/'.join(rurl.split('/')[:-1])
289     # get the release file from the cache
290     release_file = cache.get(rurl)
291
292     # create parser instance
293     rp = deb822.Release(release_file)
294
295     # architectures on this dist
296     archs = rp['Architectures'].split()
297     components = rp['Components'].split()
298     # compile a new codename that also considers the repository label
299     # to distinguish between official and unofficial repos.
300     label = rp['Label']
301     origin = rp['Origin']
302     codename = rp['Codename']
303     labelcode = '_'.join([rp['Label'], rp['Codename']])
304
305     # cleanup
306     release_file.close()
307
308     return {'baseurl': baseurl, 'archs': archs, 'components': components,
309             'codename': codename, 'label': label, 'labelcode': labelcode,
310             'origin': origin}
311
312
313 def build_pkgsurl(baseurl, component, arch):
314     return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
315
316
317 def import_release(cfg, db, rurl):
318     cache = AptListsCache()
319
320     ri = get_releaseinfo(rurl)
321
322     # compile the list of Packages files to parse and parse them
323     for c in ri['components']:
324         for a in ri['archs']:
325             # compile packages URL
326             pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
327
328             # retrieve from cache
329             packages_file = cache.get(pkgsurl)
330
331             # parse
332             for stanza in deb822.Packages.iter_paragraphs(packages_file):
333                 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
334
335             # cleanup
336             packages_file.close()
337
338     return db
339
340 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
341     """
342     :Parameter:
343       st: Package section
344     """
345     pkg = st['Package']
346
347     # only care for known packages
348     if not db.has_key(pkg):
349 #        print 'Ignoring NeuroDebian package "%s"' % pkg
350         return db
351
352     distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
353
354     if db[pkg].has_key(distkey):
355         info = db[pkg][distkey]
356     else:
357         info = {'architecture': []}
358
359     # fill in data
360     if not st['Architecture'] in info['architecture']:
361         info['architecture'].append(st['Architecture'])
362     info['maintainer'] = st['Maintainer']
363     if st.has_key('Homepage'):
364         info['homepage'] = st['Homepage']
365     info['version'] = st['Version']
366
367     # origin
368     info['distribution'] = origin
369     info['release'] = codename
370     info['component'] = component
371
372     # pool url
373     info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
374
375     # pkg description
376     descr = st['Description'].replace('%', '%%').split('\n')
377     info['description'] = descr[0].strip()
378     info['long_description'] = u'\n'.join(descr[1:])
379
380     db[pkg][distkey] = info
381
382     # charge the basic property set
383     db[pkg]['main']['description'] = info['description']
384     db[pkg]['main']['long_description'] = info['long_description']
385     if st.has_key('Source'):
386         db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
387     else:
388         db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
389     if st.has_key('Homepage'):
390         db[pkg]['main']['homepage'] = st['Homepage']
391     if st.has_key('Recommends'):
392         db[pkg]['main']['recommends'] = st['Recommends']
393
394     return db
395
396
397 def trans_codename(codename, cfg):
398     """Translate a known codename into a release description.
399
400     Unknown codenames will simply be returned as is.
401     """
402     # if we know something, tell
403     if codename in cfg.options('release codenames'):
404         return cfg.get('release codenames', codename)
405     else:
406         return codename
407
408
409 def create_dir(path):
410     if os.path.exists(path):
411         return
412
413     ps = path.split(os.path.sep)
414
415     for i in range(1,len(ps) + 1):
416         p = os.path.sep.join(ps[:i])
417
418         if not os.path.exists(p):
419             os.mkdir(p)
420
421
422 def dde_get(url, fail=False):
423     # enforce delay to be friendly to DDE
424     time.sleep(3)
425     try:
426         data = json.load(urllib2.urlopen(url+"?t=json"))['r']
427         print "SUCCESS:", url
428         return data
429     except urllib2.HTTPError, e:
430         print "NOINFO:", url, type(e)
431         return False
432     except urllib2.URLError, e:
433         print "URLERROR:", url, type(e)
434         if fail:
435             print "Permanant failure"
436             return False
437         print "Try again after 30 seconds..."
438         time.sleep(30)
439         return dde_get(url, fail=True)
440     except (StopIteration):
441         print "NOINFO:", url
442         return False
443     except Exception, e:
444         print "UDD-DOWN?:", url, type(e)
445         return False
446
447
448 def nitrc_get(spec, fail=False):
449     nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
450     try:
451         # change into this from python 2.6 on
452         #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
453         data = json.load(urllib2.urlopen(nitrc_url + '?spec=%s' % spec))
454         print "NITRC-SUCCESS:", spec
455     except urllib2.HTTPError, e:
456         print "NITRC-NOINFO:", spec, type(e)
457         return False
458     except urllib2.URLError, e:
459         print "NITRC-URLERROR:", spec, type(e)
460         if fail:
461             print "Permanant failure"
462             return False
463         print "Try again after 30 seconds..."
464         time.sleep(30)
465         return nitrc_get(spec, fail=True)
466     return data
467
468
469 def parse_nitrc(data):
470     if data is False:
471         return None
472     # simplify -- there is only one project in the data
473     project = data['projects'][0]
474     nitrc_filtered = {'downloads': 0,
475                       'id': project['id']}
476     for pkg in project['packages']:
477         for release in pkg['releases']:
478             for file in release['files']:
479                 nitrc_filtered['downloads'] += file['download_count']
480     return nitrc_filtered
481
482
483 def import_nitrc(cfg, db):
484     for p in db.keys():
485         if not cfg.has_option("nitrc ids", p):
486             continue
487         nitrc_spec = cfg.get("nitrc ids", p)
488         nitrc_data = nitrc_get(nitrc_spec)
489         nitrc_excerpt = parse_nitrc(nitrc_data)
490         if not nitrc_excerpt is None:
491             db[p]['nitrc'] = nitrc_excerpt
492     return db
493
494
495 def import_dde(cfg, db):
496     query_url = cfg.get('dde', 'pkgquery_url')
497     for p in db.keys():
498         # get freshest
499         q = dde_get(query_url + "/packages/all/%s" % p)
500         if q:
501             # copy all stuff, while preserving non-overlapping information
502             for k, v in q.iteritems():
503                 db[p]['main'][k] = v
504             # get latest popcon info for debian and ubuntu
505             # cannot use origin field itself, since it is none for few packages
506             # i.e. python-nifti
507             origin = q['drc'].split()[0]
508             if origin == 'ubuntu':
509                 if q.has_key('popcon'):
510                     db[p]['main']['ubuntu_popcon'] = q['popcon']
511                 # if we have ubuntu, need to get debian
512                 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
513                 if q and q.has_key('popcon'):
514                     db[p]['main']['debian_popcon'] = q['popcon']
515             elif origin == 'debian':
516                 if q.has_key('popcon'):
517                     db[p]['main']['debian_popcon'] = q['popcon']
518                 # if we have debian, need to get ubuntu
519                 q = dde_get(query_url + "/packages/prio-ubuntu-precise/%s" % p)
520                 if q and q.has_key('popcon'):
521                     db[p]['main']['ubuntu_popcon'] = q['popcon']
522             else:
523                 print("Ignoring unkown origin '%s' for package '%s'." \
524                         % (origin, p))
525
526         # now get info for package from all releases in UDD
527         q = dde_get(query_url + "/dist/p:%s" % p)
528         if not q:
529             continue
530         # hold all info about this package per distribution release
531         info = {}
532         for cp in q:
533             distkey = (trans_codename(cp['release'], cfg),
534                        "%s-%s" % (cp['distribution'], cp['release']))
535             if not info.has_key(distkey):
536                 info[distkey] = cp
537                 # turn into a list to append others later
538                 info[distkey]['architecture'] = [info[distkey]['architecture']]
539             # accumulate data for multiple over archs
540             else:
541                 comp = version_compare(cp['version'],
542                                                    info[distkey]['version'])
543                 # found another arch for the same version
544                 if comp == 0:
545                     info[distkey]['architecture'].append(cp['architecture'])
546                 # found newer version, dump the old ones
547                 elif comp > 0:
548                     info[distkey] = cp
549                     # turn into a list to append others later
550                     info[distkey]['architecture'] = [info[distkey]['architecture']]
551                 # simply ignore older versions
552                 else:
553                     pass
554
555         # finally assign the new package data
556         for k, v in info.iteritems():
557             db[p][k] = v
558
559     return db
560
561 def assure_unicode(s):
562     """Assure that argument is unicode
563
564     Necessary if strings are not carrying out Pythonish 'u' prefix to
565     signal UTF8 strings, but are in fact UTF8
566     """
567     if type(s) is unicode:
568         return s
569     elif type(s) is str:
570         # attempt regular unicode call and if fails -- just decode it
571         # into utf8
572         try:
573             return unicode(s)
574         except UnicodeDecodeError, e:
575             return s.decode('utf8')
576     else:
577         return assure_unicode(str(s))
578
579
580 def convert_longdescr(ld):
581     """
582
583     yoh: I think all this long description conversion will keep giving
584     us problems since per se there is no strict regulations,
585     especially in blends files
586     """
587     descr = u''
588     ld = ld.replace('% ', '%% ')
589     ld = ld.replace(r'\t', '    ') # just in case assuming tab 4
590     ld = ld.split('\n')
591     re_leadblanks = re.compile("^ *")
592     re_itemized = re.compile("^[o*-+] +")
593     re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
594     re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
595
596     def unwrap_lines(lines):
597         out = []
598         indent_levels = [-1]
599         for l in lines:
600             match = re_itemized_gr.search(l).groups()
601             if ((len(match[0]) in indent_levels and match[1] is None)
602                 or (len(match[0]) > max(indent_levels)+4)) \
603                 and match[2].strip() != '.':
604                 # append to previous
605                 if not out[-1].endswith(" "):
606                     out[-1] += " "
607                 out[-1] += match[2]
608             else:
609                 out.append(l)
610
611             indent_levels = [len(match[0])]
612             if match[1] is not None:
613                 indent_levels += [len(match[0]) + len(match[1])]
614             if match[2].strip() == '.':
615                 # reset though if '.'
616                 indent_levels = [-1]
617         return out
618
619     def dedent_withlevel(lines):
620         """Dedent `lines` given in a list provide dedented lines and how much was dedented
621         """
622         nleading = min([re_leadblanks.search(l).span()[1]
623                         for l in lines])
624         return [l[nleading:] for l in lines], nleading
625
626     def block_lines(ld, level=0):
627         # so we got list of lines
628         # dedent all of them first
629         ld, level = dedent_withlevel(ld)
630
631         # lets collect them in blocks/paragraphs
632         # 1. into paragraphs split by '.'
633         blocks, block = [], None
634
635         # next block can begin if
636         #  1.  . line
637         #  2. it was an itemized list and all items begin with
638         #     the same symbol or get further indented accordingly
639         #     so let's first check if it is an itemized list
640         itemized_match = re_itemized.search(ld[0])
641         if itemized_match:
642             allow_indents = " "*itemized_match.span()[1]
643         else:
644             allow_indents = None
645         for l in ld:
646             if block is None or l.strip() == '.' \
647                    or (len(l) and ( len(block) and (
648                 (l.startswith(' ') and not block[-1].startswith(' '))
649                 or
650                 (not l.startswith(' ') and block[-1].startswith(' '))))):
651                 block = []
652                 blocks.append(block)
653             if l.strip() != '.':
654                 block.append(l)
655         if len(blocks) == 1:
656             return blocks[0]
657         else:
658             return [block_lines(b, level+1) for b in blocks if len(b)]
659
660     def blocks_to_rst(bls, level=0):
661         # check if this block is an itemized beast
662         #itemized_match = re_itemized_gr.search(bls[0][0])
663         #if itemized_match:
664         #    res += ' 'allow_indents = " "*itemized_match.span()[1]
665         out = ''
666         for b in bls:
667             if isinstance(b, list):
668                 if len(b) == 1:
669                     out += " "*level + b[0] + '\n\n'
670                 else:
671                     out += blocks_to_rst(b, level+1)
672             else:
673                 e = " "*level + b + '\n'
674                 if not re_itemized.search(b):
675                     pass
676                     #e += '\n'
677                 elif len(e) and e[0] == ' ':
678                     # strip 1 leading blank
679                     e = e[1:]
680                 out += e
681         out += '\n'
682         return out
683
684     ld = unwrap_lines(ld)
685     bls = block_lines(ld)
686     return blocks_to_rst(bls)
687
688
689 def underline_text(text, symbol):
690     underline = symbol * len(text)
691     return '%s\n%s\n' % (text, underline)
692
693
694 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
695     # local binding for ease of use
696     pkgdb = db[pkg]
697     # do nothing if there is not at least the very basic stuff
698     if not pkgdb['main'].has_key('description'):
699         return
700     title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
701     title = underline_text(title, '*')
702
703     ex_dir = None
704     if 'sv' in pkgdb['main']:
705         ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
706         if not os.path.exists(ex_dir):
707             ex_dir = None
708     long_description = 'Description missing'
709     if 'long_description' in pkgdb['main']:
710         long_description=convert_longdescr(
711                     assure_unicode(pkgdb['main']['long_description']))
712     page = template.render(
713             pkg=pkg,
714             title=title,
715             long_description=long_description,
716             cfg=cfg,
717             db=pkgdb,
718             fulldb=db,
719             extracts_dir=ex_dir,
720             op=os.path)
721     # the following can be replaced by something like
722     # {% include "sidebar.html" ignore missing %}
723     # in the template whenever jinja 2.2 becomes available
724     addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
725     if os.path.exists(addenum):
726         page += '\n\n.. include:: %s\n' % addenum
727     return page
728
729
730 def store_db(db, filename):
731     pp = PrettyPrinter(indent=2)
732     f = codecs.open(filename, 'w', 'utf-8')
733     f.write(pp.pformat(db))
734     f.close()
735
736
737 def read_db(filename):
738     f = codecs.open(filename, 'r', 'utf-8')
739     db = eval(f.read())
740     return db
741
742 def write_sourceslist(jinja_env, cfg, outdir):
743     create_dir(outdir)
744     create_dir(os.path.join(outdir, 'lists'))
745
746     repos = {}
747     for release in cfg.options('release codenames'):
748         if release == 'data':
749             # no seperate list for the data archive
750             continue
751         transrel = trans_codename(release, cfg)
752         repos[transrel] = []
753         for mirror in cfg.options('mirrors'):
754             listname = '%s.%s' % (release, mirror)
755             repos[transrel].append((mirror, listname))
756             lf = open(os.path.join(outdir, 'lists', listname), 'w')
757             for rel in ('data', release):
758                 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
759                                                           rel)
760                 lf.write('deb %s' % aptcfg)
761                 lf.write('#deb-src %s' % aptcfg)
762             lf.close()
763
764     id2codename = dict([(cfg.get('release backport ids', r), r)
765                             for r in cfg.options('release codenames')])
766     id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
767                             for r in cfg.options('release codenames')])
768     mirror2name = dict([(m, cfg.get('mirror names', m))
769                             for m in cfg.options('mirrors')])
770     mirror2url = dict([(m, cfg.get('mirrors', m))
771                             for m in cfg.options('mirrors')])
772     srclist_template = jinja_env.get_template('sources_lists.rst')
773     sl = open(os.path.join(outdir, 'sources_lists'), 'w')
774     sl.write(srclist_template.render(id2codename=id2codename,
775                                      id2relname=id2relname,
776                                      mirror2name=mirror2name,
777                                      mirror2url=mirror2url))
778     sl.close()
779
780
781 def sort_by_tasks(db):
782     tasks = {}
783     for pkg in db.keys():
784         if not 'blends' in db[pkg]:
785             # no blend info
786             continue
787         blendinfo = db[pkg]['blends']
788         if not 'tasks' in blendinfo:
789             # no task info in blend data
790             continue
791         taskinfo = blendinfo['tasks']
792         for task in taskinfo:
793             taskname = task[1]
794             if not taskname in tasks:
795                 tasks[taskname] = []
796             else:
797                 tasks[taskname].append(pkg)
798     return tasks
799
800
801 def sort_by_maintainer(db):
802     maints = {}
803     maint_ids = {}
804     for pkg in db.keys():
805         maint = None
806         pkginfo = db[pkg]
807         # start with the blends info
808         if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
809             maint = pkginfo['blends']['responsible']
810         if not 'main' in db[pkg] and maint is None:
811             # no info
812             continue
813         info = db[pkg]['main']
814         if not 'maintainer' in info and maint is None:
815             # no maintainer info
816             continue
817         if 'original_maintainer' in info and not info['original_maintainer'] is None:
818             maint = info['original_maintainer']
819         elif 'maintainer' in info and not info['maintainer'] is None:
820             maint = info['maintainer']
821         if maint is None:
822             # no sane maintainer info
823             continue
824         # safeguard: <> confuses sphinx and we don't care about different emails
825         maint = maint[:maint.find('<')].strip()
826         # kick out non-ascii ones (should not be, but too tired to find the bug)
827         try:
828             codecs.ascii_decode(maint)
829         except UnicodeEncodeError:
830             continue
831         if not maint.lower() in maints:
832             maints[maint.lower()] = []
833             maint_ids[maint.lower()] = [maint]
834         else:
835             maint_ids[maint.lower()].append(maint)
836         maints[maint.lower()].append(pkg)
837     # remove duplicates
838     out = {}
839     for m in maints:
840         out[maint_ids[m][0]] = np.unique(maints[m])
841     return out
842
843
844 def sort_by_release(db):
845     rels = {}
846     for pkg in db.keys():
847         pkginfo = db[pkg]
848         for sec in pkginfo:
849             if not isinstance(sec, tuple):
850                 # only releases are of interest
851                 continue
852             relname = sec[0]
853             if not relname in rels:
854                 rels[relname] = []
855             else:
856                 rels[relname].append(pkg)
857     # remove duplicates
858     for r in rels:
859         rels[r] = np.unique(rels[r])
860     return rels
861
862
863 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
864     create_dir(outdir)
865     create_dir(os.path.join(outdir, 'pkgs'))
866     create_dir(os.path.join(outdir, 'pkglists'))
867     # template for individual package listings
868     toc_template = jinja_env.get_template('pkgs_toc.rst')
869     # the high-level package list overview
870     hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
871     hltoc.write('.. _pkglists:\n\n')
872     hltoc.write(underline_text('Software packages', '='))
873     defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
874             (sort_by_release(db), 'By release', 'Packages for %s'),
875             (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
876     for def_ in defs:
877         # TOC for each thingie
878         pkgsdict, sectitle, title_tmpl = def_
879         hltoc.write(underline_text(sectitle, '-'))
880         ids = pkgsdict.keys()
881         ids.sort()
882         for id_ in ids:
883             label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
884             # filter out crap
885             filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
886             if not len(filtered_pkgs):
887                 continue
888             plist = toc_template.render(
889                         label=label,
890                         title=underline_text(title_tmpl % id_, '='),
891                         pkgs=filtered_pkgs,
892                         db=db)
893             toc = codecs.open(os.path.join(outdir,
894                                            'pkglists',
895                                            '%s.rst' % label),
896                               'w', 'utf-8')
897             toc.write(plist)
898             toc.close()
899             hltoc.write('* :ref:`%s`\n' % label)
900         hltoc.write('\n\n')
901
902
903     # now a complete list of all packages
904     hltoc.write(underline_text('Complete list', '-'))
905     toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
906                       'w', 'utf-8')
907     toc.write(toc_template.render(label='full_pkg_list',
908                 title=underline_text('Complete package list', '='),
909                 pkgs=db.keys(), db=db))
910     toc.close()
911     hltoc.write('* :ref:`full_pkg_list`\n')
912     hltoc.close()
913
914     # and now each individual package page
915     pkg_template = jinja_env.get_template('pkg.rst')
916     for p in db.keys():
917         page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
918         # when no page is available skip this package
919         if page is None:
920             continue
921         pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
922         pf.write(page)
923         pf.close()
924
925
926 def prepOptParser(op):
927     # use module docstring for help output
928     op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
929
930     op.add_option("--db",
931                   action="store", type="string", dest="db",
932                   default=None,
933                   help="Database file to read. Default: None")
934
935     op.add_option("--cfg",
936                   action="store", type="string", dest="cfg",
937                   default=None,
938                   help="Repository config file.")
939
940     op.add_option("-o", "--outdir",
941                   action="store", type="string", dest="outdir",
942                   default=None,
943                   help="Target directory for ReST output. Default: None")
944
945     op.add_option("-r", "--release-url",
946                   action="append", dest="release_urls",
947                   help="None")
948
949     op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
950                   type="string", default=None, help="None")
951
952     op.add_option("--extracts", action="store", dest="extracts_dir",
953                   type="string", default=None, help="None")
954
955
956 def main():
957     op = OptionParser(version="%prog 0.0.2")
958     prepOptParser(op)
959
960     (opts, args) = op.parse_args()
961
962     if len(args) != 1:
963         print('There needs to be exactly one command')
964         sys.exit(1)
965
966     cmd = args[0]
967
968     if opts.cfg is None:
969         print("'--cfg' option is mandatory.")
970         sys.exit(1)
971     if opts.db is None:
972         print("'--db' option is mandatory.")
973         sys.exit(1)
974
975
976     cfg = SafeConfigParser()
977     cfg.read(opts.cfg)
978
979     if cmd == 'debug_ld':
980         # load the db from file
981         db = read_db(opts.db)
982
983         for p in db.keys():
984         #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
985             if not 'long_description' in db[p]['main']:
986                 continue
987             ld = db[p]['main']['long_description']
988
989             print ">>>>>>>>> ", p
990             #print ld
991             print "----"
992             print convert_longdescr(ld)
993         raise SystemExit
994     # load existing db, unless renew is requested
995     if cmd == 'updatedb':
996         db = {}
997         if cfg.has_option('packages', 'select taskfiles'):
998             db = add_pkgfromtaskfile(db, cfg.get('packages',
999                                                  'select taskfiles').split())
1000
1001         # add additional package names from config file
1002         if cfg.has_option('packages', 'select names'):
1003             for p in cfg.get('packages', 'select names').split():
1004                 if not db.has_key(p):
1005                     db[p] = get_emptydbentry()
1006
1007         # get info from task files
1008         if cfg.has_option('packages', 'prospective'):
1009             for url in cfg.get('packages', 'prospective').split():
1010                 db = import_blendstask(cfg, db, url)
1011
1012         # parse NeuroDebian repository
1013         if cfg.has_option('neurodebian', 'releases'):
1014             for rurl in cfg.get('neurodebian', 'releases').split():
1015                 db = import_release(cfg, db, rurl)
1016
1017         # collect package information from DDE
1018         db = import_dde(cfg, db)
1019         # get info from NITRC
1020         db = import_nitrc(cfg, db)
1021         # store the new DB
1022         store_db(db, opts.db)
1023         # and be done
1024         return
1025
1026     # load the db from file
1027     db = read_db(opts.db)
1028
1029     # fire up jinja
1030     jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
1031
1032     # generate package pages and TOC and write them to files
1033     write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
1034
1035     write_sourceslist(jinja_env, cfg, opts.outdir)
1036
1037 if __name__ == "__main__":
1038     main()