2 """Tell me who you are!
7 from debian_bundle import deb822
11 # Lets first assure no guarding (but annoying) warnings
13 warnings.simplefilter('ignore', FutureWarning)
14 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
17 from ConfigParser import SafeConfigParser
18 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
30 from jinja2 import Environment, PackageLoader
32 from pprint import PrettyPrinter
35 class AptListsCache(object):
36 def __init__(self, cachedir='build/cache',
39 self.cachedir = cachedir
41 if not ro_cachedirs is None:
42 self.ro_cachedirs = ro_cachedirs
44 self.ro_cachedirs = []
47 create_dir(self.cachedir)
49 def get(self, url, update=False):
50 """Looks in the cache if the file is there and takes the cached one.
51 Otherwise it is downloaded first.
53 Knows how to deal with http:// and svn:// URLs.
58 # look whether it is compressed
59 cext = url.split('.')[-1]
60 if cext in ['gz', 'bz2']:
61 target_url = url[:-1 * len(cext) -1]
63 # assume not compressed
67 # turn url into a filename -- mimik what APT does for
69 tfilename = '_'.join(target_url.split('/')[2:])
71 # if we need to download anyway do not search
73 cfilename = os.path.join(self.cachedir, tfilename)
75 # look for the uncompressed file anywhere in the cache
77 for cp in [self.cachedir] + self.ro_cachedirs:
78 if os.path.exists(os.path.join(cp, tfilename)):
79 cfilename = os.path.join(cp, tfilename)
84 cfilename = os.path.join(self.cachedir, tfilename)
87 # if updated needed -- download
89 #print 'Caching file from %s' % url
91 if url.startswith('svn://'):
93 pysvn.Client().export(url, cfilename)
94 if url.startswith('http://'):
96 tempfile, ignored = urllib.urlretrieve(url)
101 decompressor = 'gzip'
103 decompressor = 'bzip2'
108 "Don't know how to decompress %s files" \
111 if not decompressor is None:
112 if subprocess.call([decompressor, '-d', '-q', '-f',
114 raise RuntimeError, \
115 "Something went wrong while decompressing '%s'" \
118 # move decompressed file into cache
119 shutil.move(os.path.splitext(tempfile)[0], cfilename)
121 # XXX do we need that if explicit filename is provided?
125 fh = codecs.open(cfilename, 'r', 'utf-8')
130 def add_pkgfromtaskfile(db, urls):
131 cache = AptListsCache()
137 # loop over all stanzas
138 for stanza in deb822.Packages.iter_paragraphs(fh):
139 if stanza.has_key('Depends'):
140 pkg = stanza['Depends']
141 elif stanza.has_key('Recommends'):
142 pkg = stanza['Recommends']
143 elif stanza.has_key('Suggests'):
144 pkg = stanza['Suggests']
148 # account for multiple packages per line
150 pkgs += [p.strip() for p in pkg.split(',')]
152 pkgs.append(pkg.strip())
155 if not db.has_key(p):
156 db[p] = get_emptydbentry()
160 def get_emptydbentry():
163 def import_blendstask(cfg, db, url):
164 cache = AptListsCache()
168 # figure out blend's task page URL, since they differ from blend to blend
169 urlsec = url.split('/')
170 blendname = urlsec[-3]
171 if blendname == 'debian-med':
172 taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
173 elif blendname == 'debian-science':
174 taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
176 raise ValueError('Unknown blend "%s"' % blendname)
177 taskpage_url += urlsec[-1]
179 for st in deb822.Packages.iter_paragraphs(fh):
180 if st.has_key('Task'):
181 task_name = st['Task']
182 task = (blendname, task_name, taskpage_url)
184 if st.has_key('Depends'):
186 elif st.has_key('Recommends'):
187 pkg = st['Recommends']
188 elif st.has_key('Suggests'):
191 # print 'Warning: Cannot determine name of prospective package ' \
192 # '... ignoring. Dump follows:'
196 # take care of pkg lists
197 for p in pkg.split(', '):
198 if not db.has_key(p):
199 print 'Ignoring blend package "%s"' % p
205 info['tasks'] = [task]
206 if st.has_key('License'):
207 info['license'] = st['License']
208 if st.has_key('Responsible'):
209 info['responsible'] = st['Responsible']
212 if st.has_key('Pkg-Description'):
213 descr = st['Pkg-Description'].split('\n')
214 info['description'] = descr[0].strip()
215 info['long_description'] = \
216 u'\n'.join(descr[1:])
218 # charge the basic property set
219 db[p]['main']['description'] = info['description']
220 db[p]['main']['long_description'] = info['long_description']
221 if st.has_key('WNPP'):
222 db[p]['main']['debian_itp'] = st['WNPP']
223 if st.has_key('Pkg-URL'):
224 db[p]['main']['other_pkg'] = st['Pkg-URL']
225 if st.has_key('Homepage'):
226 db[p]['main']['homepage'] = st['Homepage']
229 if st.has_key('Published-Title'):
230 title = st['Published-Title']
232 # trip trailing dot -- added later
233 pub = {'title': title[:-1]}
235 pub = {'title': title}
236 if st.has_key('Published-Authors'):
237 pub['authors'] = st['Published-Authors']
238 if st.has_key('Published-Year'):
239 pub['year'] = st['Published-Year']
240 if st.has_key('Published-In'):
241 pub['in'] = st['Published-In']
242 if st.has_key('Published-URL'):
243 pub['url'] = st['Published-URL']
244 if st.has_key('Published-DOI'):
245 pub['doi'] = st['Published-DOI']
246 # need at least one URL
247 if not pub.has_key('url'):
248 pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
250 db[p]['main']['publication'] = pub
253 if st.has_key('Registration'):
254 db[p]['main']['registration'] = st['Registration']
257 if st.has_key('Remark'):
258 # prepend a single space to make it look like a long description
259 info['remark'] = convert_longdescr(' ' + st['Remark'])
261 # only store if there isn't something already
262 if not db[p].has_key('blends'):
263 db[p]['blends'] = info
265 # just add this tasks name and id
266 db[p]['blends']['tasks'].append(task)
268 # handle pkg name aliases
269 if p in cfg.options('blend package aliases'):
270 src_entry = db[p].copy()
271 # remove original entry
273 # copy the entry into all aliases
274 for alias in cfg.get('blend package aliases', p).split():
275 print "Aliasing %s to %s" % (p, alias)
276 db[alias] = copy.deepcopy(src_entry)
281 def get_releaseinfo(rurl):
282 cache = AptListsCache()
283 # root URL of the repository
284 baseurl = '/'.join(rurl.split('/')[:-1])
285 # get the release file from the cache
286 release_file = cache.get(rurl)
288 # create parser instance
289 rp = deb822.Release(release_file)
291 # architectures on this dist
292 archs = rp['Architectures'].split()
293 components = rp['Components'].split()
294 # compile a new codename that also considers the repository label
295 # to distinguish between official and unofficial repos.
297 origin = rp['Origin']
298 codename = rp['Codename']
299 labelcode = '_'.join([rp['Label'], rp['Codename']])
304 return {'baseurl': baseurl, 'archs': archs, 'components': components,
305 'codename': codename, 'label': label, 'labelcode': labelcode,
309 def build_pkgsurl(baseurl, component, arch):
310 return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
313 def import_release(cfg, db, rurl):
314 cache = AptListsCache()
316 ri = get_releaseinfo(rurl)
318 # compile the list of Packages files to parse and parse them
319 for c in ri['components']:
320 for a in ri['archs']:
321 # compile packages URL
322 pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
324 # retrieve from cache
325 packages_file = cache.get(pkgsurl)
328 for stanza in deb822.Packages.iter_paragraphs(packages_file):
329 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
332 packages_file.close()
336 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
343 # only care for known packages
344 if not db.has_key(pkg):
345 # print 'Ignoring NeuroDebian package "%s"' % pkg
348 distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
350 if db[pkg].has_key(distkey):
351 info = db[pkg][distkey]
353 info = {'architecture': []}
356 if not st['Architecture'] in info['architecture']:
357 info['architecture'].append(st['Architecture'])
358 info['maintainer'] = st['Maintainer']
359 if st.has_key('Homepage'):
360 info['homepage'] = st['Homepage']
361 info['version'] = st['Version']
364 info['distribution'] = origin
365 info['release'] = codename
366 info['component'] = component
369 info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
372 descr = st['Description'].replace('%', '%%').split('\n')
373 info['description'] = descr[0].strip()
374 info['long_description'] = u'\n'.join(descr[1:])
376 db[pkg][distkey] = info
378 # charge the basic property set
379 db[pkg]['main']['description'] = info['description']
380 db[pkg]['main']['long_description'] = info['long_description']
381 if st.has_key('Source'):
382 db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
384 db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
385 if st.has_key('Homepage'):
386 db[pkg]['main']['homepage'] = st['Homepage']
387 if st.has_key('Recommends'):
388 db[pkg]['main']['recommends'] = st['Recommends']
393 def trans_codename(codename, cfg):
394 """Translate a known codename into a release description.
396 Unknown codenames will simply be returned as is.
398 # if we know something, tell
399 if codename in cfg.options('release codenames'):
400 return cfg.get('release codenames', codename)
405 def create_dir(path):
406 if os.path.exists(path):
409 ps = path.split(os.path.sep)
411 for i in range(1,len(ps) + 1):
412 p = os.path.sep.join(ps[:i])
414 if not os.path.exists(p):
418 def dde_get(url, fail=False):
419 # enforce delay to be friendly to DDE
422 data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
423 print "SUCCESS:", url
425 except urllib2.HTTPError, e:
426 print "NOINFO:", url, type(e)
428 except urllib2.URLError, e:
429 print "URLERROR:", url, type(e)
431 print "Permanant failure"
433 print "Try again after 30 seconds..."
435 return dde_get(url, fail=True)
436 except (StopIteration):
439 except json.ReadException, e:
440 print "UDD-DOWN?:", url, type(e)
444 def nitrc_get(spec, fail=False):
445 nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
447 # change into this from python 2.6 on
448 #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
449 data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
450 print "NITRC-SUCCESS:", spec
451 except urllib2.HTTPError, e:
452 print "NITRC-NOINFO:", spec, type(e)
454 except urllib2.URLError, e:
455 print "NITRC-URLERROR:", spec, type(e)
457 print "Permanant failure"
459 print "Try again after 30 seconds..."
461 return nitrc_get(spec, fail=True)
465 def parse_nitrc(data):
468 # simplify -- there is only one project in the data
469 project = data['projects'][0]
470 nitrc_filtered = {'downloads': 0,
472 for pkg in project['packages']:
473 for release in pkg['releases']:
474 for file in release['files']:
475 nitrc_filtered['downloads'] += file['download_count']
476 return nitrc_filtered
479 def import_nitrc(cfg, db):
481 if not cfg.has_option("nitrc ids", p):
483 nitrc_spec = cfg.get("nitrc ids", p)
484 nitrc_data = nitrc_get(nitrc_spec)
485 nitrc_excerpt = parse_nitrc(nitrc_data)
486 if not nitrc_excerpt is None:
487 db[p]['nitrc'] = nitrc_excerpt
491 def import_dde(cfg, db):
492 query_url = cfg.get('dde', 'pkgquery_url')
495 q = dde_get(query_url + "/packages/all/%s" % p)
497 # copy all stuff, while preserving non-overlapping information
498 for k, v in q.iteritems():
500 # get latest popcon info for debian and ubuntu
501 # cannot use origin field itself, since it is none for few packages
503 origin = q['drc'].split()[0]
504 if origin == 'ubuntu':
505 if q.has_key('popcon'):
506 db[p]['main']['ubuntu_popcon'] = q['popcon']
507 # if we have ubuntu, need to get debian
508 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
509 if q and q.has_key('popcon'):
510 db[p]['main']['debian_popcon'] = q['popcon']
511 elif origin == 'debian':
512 if q.has_key('popcon'):
513 db[p]['main']['debian_popcon'] = q['popcon']
514 # if we have debian, need to get ubuntu
515 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
516 if q and q.has_key('popcon'):
517 db[p]['main']['ubuntu_popcon'] = q['popcon']
519 print("Ignoring unkown origin '%s' for package '%s'." \
522 # now get info for package from all releases in UDD
523 q = dde_get(query_url + "/dist/p:%s" % p)
526 # hold all info about this package per distribution release
529 distkey = (trans_codename(cp['release'], cfg),
530 "%s-%s" % (cp['distribution'], cp['release']))
531 if not info.has_key(distkey):
533 # turn into a list to append others later
534 info[distkey]['architecture'] = [info[distkey]['architecture']]
535 # accumulate data for multiple over archs
537 comp = apt.VersionCompare(cp['version'],
538 info[distkey]['version'])
539 # found another arch for the same version
541 info[distkey]['architecture'].append(cp['architecture'])
542 # found newer version, dump the old ones
545 # turn into a list to append others later
546 info[distkey]['architecture'] = [info[distkey]['architecture']]
547 # simply ignore older versions
551 # finally assign the new package data
552 for k, v in info.iteritems():
557 def assure_unicode(s):
558 """Assure that argument is unicode
560 Necessary if strings are not carrying out Pythonish 'u' prefix to
561 signal UTF8 strings, but are in fact UTF8
563 if type(s) is unicode:
566 # attempt regular unicode call and if fails -- just decode it
570 except UnicodeDecodeError, e:
571 return s.decode('utf8')
573 return assure_unicode(str(s))
576 def convert_longdescr(ld):
577 ld = ld.replace('% ', '%% ')
579 for i, l in enumerate(ld):
581 ld[i] = ' #NEWLINEMARKER#'
582 # look for embedded lists
583 elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
584 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
586 ld = u' '.join([l[1:] for l in ld])
587 ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
588 # cleanup any leftover (e.g. trailing markers)
589 ld = ld.replace('#NEWLINEMARKER#', '')
590 # safe-guard ReST active symbols
591 ld = re.sub(r'([\'`*])', r'\\\1', ld)
595 def underline_text(text, symbol):
596 underline = symbol * len(text)
597 return '%s\n%s\n' % (text, underline)
600 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
601 # local binding for ease of use
603 # do nothing if there is not at least the very basic stuff
604 if not pkgdb['main'].has_key('description'):
606 title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
607 title = underline_text(title, '*')
610 if 'sv' in pkgdb['main']:
611 ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
612 if not os.path.exists(ex_dir):
614 page = template.render(
617 long_description=convert_longdescr(
618 assure_unicode(pkgdb['main']['long_description'])),
624 # the following can be replaced by something like
625 # {% include "sidebar.html" ignore missing %}
626 # in the template whenever jinja 2.2 becomes available
627 addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
628 if os.path.exists(addenum):
629 page += '\n\n.. include:: %s\n' % addenum
633 def store_db(db, filename):
634 pp = PrettyPrinter(indent=2)
635 f = codecs.open(filename, 'w', 'utf-8')
636 f.write(pp.pformat(db))
640 def read_db(filename):
641 f = codecs.open(filename, 'r', 'utf-8')
645 def write_sourceslist(jinja_env, cfg, outdir):
647 create_dir(os.path.join(outdir, 'lists'))
650 for release in cfg.options('release codenames'):
651 if release == 'data':
652 # no seperate list for the data archive
654 transrel = trans_codename(release, cfg)
656 for mirror in cfg.options('mirrors'):
657 listname = '%s.%s' % (release, mirror)
658 repos[transrel].append((mirror, listname))
659 lf = open(os.path.join(outdir, 'lists', listname), 'w')
660 for rel in ('data', release):
661 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
663 lf.write('deb %s' % aptcfg)
664 lf.write('#deb-src %s' % aptcfg)
667 id2codename = dict([(cfg.get('release backport ids', r), r)
668 for r in cfg.options('release codenames')])
669 id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
670 for r in cfg.options('release codenames')])
671 mirror2name = dict([(m, cfg.get('mirror names', m))
672 for m in cfg.options('mirrors')])
673 mirror2url = dict([(m, cfg.get('mirrors', m))
674 for m in cfg.options('mirrors')])
675 srclist_template = jinja_env.get_template('sources_lists.rst')
676 sl = open(os.path.join(outdir, 'sources_lists'), 'w')
677 sl.write(srclist_template.render(id2codename=id2codename,
678 id2relname=id2relname,
679 mirror2name=mirror2name,
680 mirror2url=mirror2url))
684 def sort_by_tasks(db):
686 for pkg in db.keys():
687 if not 'blends' in db[pkg]:
690 blendinfo = db[pkg]['blends']
691 if not 'tasks' in blendinfo:
692 # no task info in blend data
694 taskinfo = blendinfo['tasks']
695 for task in taskinfo:
697 if not taskname in tasks:
700 tasks[taskname].append(pkg)
704 def sort_by_maintainer(db):
706 for pkg in db.keys():
707 if not 'main' in db[pkg]:
710 info = db[pkg]['main']
711 if not 'maintainer' in info:
714 if 'original_maintainer' in info and not info['original_maintainer'] is None:
715 maint = info['original_maintainer']
717 maint = info['maintainer']
719 # no sane maintainer info
721 # safeguard: <> confuses sphinx and we don't care about different emails
722 maint = maint[:maint.find('<')].strip()
723 if not maint in maints:
726 maints[maint].append(pkg)
729 maints[m] = np.unique(maints[m])
733 def sort_by_release(db):
735 for pkg in db.keys():
738 if not isinstance(sec, tuple):
739 # only releases are of interest
742 if not relname in rels:
745 rels[relname].append(pkg)
748 rels[r] = np.unique(rels[r])
752 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
754 create_dir(os.path.join(outdir, 'pkgs'))
755 create_dir(os.path.join(outdir, 'pkglists'))
756 # template for individual package listings
757 toc_template = jinja_env.get_template('pkgs_toc.rst')
758 # the high-level package list overview
759 hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
760 hltoc.write('.. _pkglists:\n\n')
761 hltoc.write(underline_text('Software packages', '='))
762 defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
763 (sort_by_release(db), 'By release', 'Packages for %s'),
764 (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
766 # TOC for each thingie
767 pkgsdict, sectitle, title_tmpl = def_
768 hltoc.write(underline_text(sectitle, '-'))
769 ids = pkgsdict.keys()
770 for id_ in np.unique(ids):
771 label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
772 if not len(pkgsdict[id_]):
775 plist = toc_template.render(
777 title=underline_text(title_tmpl % id_, '='),
782 toc = codecs.open(os.path.join(outdir,
786 toc.write(toc_template.render(
788 title=underline_text(title_tmpl % id_, '='),
792 except jinja2.exceptions.UndefinedError:
795 hltoc.write('* :ref:`%s`\n' % label)
799 # now a complete list of all packages
800 hltoc.write(underline_text('Complete list', '-'))
801 toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
803 toc.write(toc_template.render(label='full_pkg_list',
804 title=underline_text('Complete package list', '='),
805 pkgs=db.keys(), db=db))
807 hltoc.write('* :ref:`full_pkg_list`\n')
810 # and now each individual package page
811 pkg_template = jinja_env.get_template('pkg.rst')
813 page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
814 # when no page is available skip this package
817 pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
822 def prepOptParser(op):
823 # use module docstring for help output
824 op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
826 op.add_option("--db",
827 action="store", type="string", dest="db",
829 help="Database file to read. Default: None")
831 op.add_option("--cfg",
832 action="store", type="string", dest="cfg",
834 help="Repository config file.")
836 op.add_option("-o", "--outdir",
837 action="store", type="string", dest="outdir",
839 help="Target directory for ReST output. Default: None")
841 op.add_option("-r", "--release-url",
842 action="append", dest="release_urls",
845 op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
846 type="string", default=None, help="None")
848 op.add_option("--extracts", action="store", dest="extracts_dir",
849 type="string", default=None, help="None")
853 op = OptionParser(version="%prog 0.0.2")
856 (opts, args) = op.parse_args()
859 print('There needs to be exactly one command')
865 print("'--cfg' option is mandatory.")
868 print("'--db' option is mandatory.")
872 cfg = SafeConfigParser()
875 # load existing db, unless renew is requested
876 if cmd == 'updatedb':
878 if cfg.has_option('packages', 'select taskfiles'):
879 db = add_pkgfromtaskfile(db, cfg.get('packages',
880 'select taskfiles').split())
882 # add additional package names from config file
883 if cfg.has_option('packages', 'select names'):
884 for p in cfg.get('packages', 'select names').split():
885 if not db.has_key(p):
886 db[p] = get_emptydbentry()
888 # get info from task files
889 if cfg.has_option('packages', 'prospective'):
890 for url in cfg.get('packages', 'prospective').split():
891 db = import_blendstask(cfg, db, url)
893 # parse NeuroDebian repository
894 if cfg.has_option('neurodebian', 'releases'):
895 for rurl in cfg.get('neurodebian', 'releases').split():
896 db = import_release(cfg, db, rurl)
898 # collect package information from DDE
899 db = import_dde(cfg, db)
900 # get info from NITRC
901 db = import_nitrc(cfg, db)
903 store_db(db, opts.db)
907 # load the db from file
908 db = read_db(opts.db)
911 jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
913 # generate package pages and TOC and write them to files
914 write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
916 write_sourceslist(jinja_env, cfg, opts.outdir)
918 if __name__ == "__main__":