2 """Tell me who you are!
7 from debian_bundle import deb822
11 # Lets first assure no guarding (but annoying) warnings
13 warnings.simplefilter('ignore', FutureWarning)
14 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
17 from ConfigParser import SafeConfigParser
18 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
30 from jinja2 import Environment, PackageLoader
32 from pprint import PrettyPrinter
35 class AptListsCache(object):
36 def __init__(self, cachedir='build/cache',
39 self.cachedir = cachedir
41 if not ro_cachedirs is None:
42 self.ro_cachedirs = ro_cachedirs
44 self.ro_cachedirs = []
47 create_dir(self.cachedir)
49 def get(self, url, update=False):
50 """Looks in the cache if the file is there and takes the cached one.
51 Otherwise it is downloaded first.
53 Knows how to deal with http:// and svn:// URLs.
58 # look whether it is compressed
59 cext = url.split('.')[-1]
60 if cext in ['gz', 'bz2']:
61 target_url = url[:-1 * len(cext) -1]
63 # assume not compressed
67 # turn url into a filename -- mimik what APT does for
69 tfilename = '_'.join(target_url.split('/')[2:])
71 # if we need to download anyway do not search
73 cfilename = os.path.join(self.cachedir, tfilename)
75 # look for the uncompressed file anywhere in the cache
77 for cp in [self.cachedir] + self.ro_cachedirs:
78 if os.path.exists(os.path.join(cp, tfilename)):
79 cfilename = os.path.join(cp, tfilename)
84 cfilename = os.path.join(self.cachedir, tfilename)
87 # if updated needed -- download
89 #print 'Caching file from %s' % url
91 if url.startswith('svn://'):
93 pysvn.Client().export(url, cfilename)
94 if url.startswith('http://'):
96 tempfile, ignored = urllib.urlretrieve(url)
101 decompressor = 'gzip'
103 decompressor = 'bzip2'
108 "Don't know how to decompress %s files" \
111 if not decompressor is None:
112 if subprocess.call([decompressor, '-d', '-q', '-f',
114 raise RuntimeError, \
115 "Something went wrong while decompressing '%s'" \
118 # move decompressed file into cache
119 shutil.move(os.path.splitext(tempfile)[0], cfilename)
121 # XXX do we need that if explicit filename is provided?
125 fh = codecs.open(cfilename, 'r', 'utf-8')
130 def add_pkgfromtaskfile(db, urls):
131 cache = AptListsCache()
137 # loop over all stanzas
138 for stanza in deb822.Packages.iter_paragraphs(fh):
139 if stanza.has_key('Depends'):
140 pkg = stanza['Depends']
141 elif stanza.has_key('Recommends'):
142 pkg = stanza['Recommends']
143 elif stanza.has_key('Suggests'):
144 pkg = stanza['Suggests']
148 # account for multiple packages per line
150 pkgs += [p.strip() for p in pkg.split(',')]
152 pkgs.append(pkg.strip())
155 if not db.has_key(p):
156 db[p] = get_emptydbentry()
160 def get_emptydbentry():
163 def import_blendstask(cfg, db, url):
164 cache = AptListsCache()
168 # figure out blend's task page URL, since they differ from blend to blend
169 urlsec = url.split('/')
170 blendname = urlsec[-3]
171 if blendname == 'debian-med':
172 taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
173 elif blendname == 'debian-science':
174 taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
176 raise ValueError('Unknown blend "%s"' % blendname)
177 taskpage_url += urlsec[-1]
179 for st in deb822.Packages.iter_paragraphs(fh):
180 if st.has_key('Task'):
181 task_name = st['Task']
182 task = (blendname, task_name, taskpage_url)
184 if st.has_key('Depends'):
186 elif st.has_key('Recommends'):
187 pkg = st['Recommends']
188 elif st.has_key('Suggests'):
191 # print 'Warning: Cannot determine name of prospective package ' \
192 # '... ignoring. Dump follows:'
196 # take care of pkg lists
197 for p in pkg.split(', '):
198 if not db.has_key(p):
199 print 'Ignoring blend package "%s"' % p
205 info['tasks'] = [task]
206 if st.has_key('License'):
207 info['license'] = st['License']
208 if st.has_key('Responsible'):
209 info['responsible'] = st['Responsible']
212 if st.has_key('Pkg-Description'):
213 descr = st['Pkg-Description'].split('\n')
214 info['description'] = descr[0].strip()
215 info['long_description'] = \
216 u'\n'.join(descr[1:])
218 # charge the basic property set
219 db[p]['main']['description'] = info['description']
220 db[p]['main']['long_description'] = info['long_description']
221 if st.has_key('WNPP'):
222 db[p]['main']['debian_itp'] = st['WNPP']
223 if st.has_key('Pkg-URL'):
224 db[p]['main']['other_pkg'] = st['Pkg-URL']
225 if st.has_key('Homepage'):
226 db[p]['main']['homepage'] = st['Homepage']
229 if st.has_key('Published-Title'):
230 title = st['Published-Title']
232 # trip trailing dot -- added later
233 pub = {'title': title[:-1]}
235 pub = {'title': title}
236 if st.has_key('Published-Authors'):
237 pub['authors'] = st['Published-Authors']
238 if st.has_key('Published-Year'):
239 pub['year'] = st['Published-Year']
240 if st.has_key('Published-In'):
241 pub['in'] = st['Published-In']
242 if st.has_key('Published-URL'):
243 pub['url'] = st['Published-URL']
244 if st.has_key('Published-DOI'):
245 pub['doi'] = st['Published-DOI']
246 # need at least one URL
247 if not pub.has_key('url'):
248 pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
250 db[p]['main']['publication'] = pub
253 if st.has_key('Registration'):
254 db[p]['main']['registration'] = st['Registration']
257 if st.has_key('Remark'):
258 # prepend a single space to make it look like a long description
259 info['remark'] = convert_longdescr(' ' + st['Remark'])
261 # only store if there isn't something already
262 if not db[p].has_key('blends'):
263 db[p]['blends'] = info
265 # just add this tasks name and id
266 db[p]['blends']['tasks'].append(task)
268 # handle pkg name aliases
269 if p in cfg.options('blend package aliases'):
270 src_entry = db[p].copy()
271 # remove original entry
273 # copy the entry into all aliases
274 for alias in cfg.get('blend package aliases', p).split():
275 print "Aliasing %s to %s" % (p, alias)
276 db[alias] = copy.deepcopy(src_entry)
281 def get_releaseinfo(rurl):
282 cache = AptListsCache()
283 # root URL of the repository
284 baseurl = '/'.join(rurl.split('/')[:-1])
285 # get the release file from the cache
286 release_file = cache.get(rurl)
288 # create parser instance
289 rp = deb822.Release(release_file)
291 # architectures on this dist
292 archs = rp['Architectures'].split()
293 components = rp['Components'].split()
294 # compile a new codename that also considers the repository label
295 # to distinguish between official and unofficial repos.
297 origin = rp['Origin']
298 codename = rp['Codename']
299 labelcode = '_'.join([rp['Label'], rp['Codename']])
304 return {'baseurl': baseurl, 'archs': archs, 'components': components,
305 'codename': codename, 'label': label, 'labelcode': labelcode,
309 def build_pkgsurl(baseurl, component, arch):
310 return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
313 def import_release(cfg, db, rurl):
314 cache = AptListsCache()
316 ri = get_releaseinfo(rurl)
318 # compile the list of Packages files to parse and parse them
319 for c in ri['components']:
320 for a in ri['archs']:
321 # compile packages URL
322 pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
324 # retrieve from cache
325 packages_file = cache.get(pkgsurl)
328 for stanza in deb822.Packages.iter_paragraphs(packages_file):
329 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
332 packages_file.close()
336 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
343 # only care for known packages
344 if not db.has_key(pkg):
345 # print 'Ignoring NeuroDebian package "%s"' % pkg
348 distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
350 if db[pkg].has_key(distkey):
351 info = db[pkg][distkey]
353 info = {'architecture': []}
356 if not st['Architecture'] in info['architecture']:
357 info['architecture'].append(st['Architecture'])
358 info['maintainer'] = st['Maintainer']
359 if st.has_key('Homepage'):
360 info['homepage'] = st['Homepage']
361 info['version'] = st['Version']
364 info['distribution'] = origin
365 info['release'] = codename
366 info['component'] = component
369 info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
372 descr = st['Description'].replace('%', '%%').split('\n')
373 info['description'] = descr[0].strip()
374 info['long_description'] = u'\n'.join(descr[1:])
376 db[pkg][distkey] = info
378 # charge the basic property set
379 db[pkg]['main']['description'] = info['description']
380 db[pkg]['main']['long_description'] = info['long_description']
381 if st.has_key('Source'):
382 db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
384 db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
385 if st.has_key('Homepage'):
386 db[pkg]['main']['homepage'] = st['Homepage']
387 if st.has_key('Recommends'):
388 db[pkg]['main']['recommends'] = st['Recommends']
393 def trans_codename(codename, cfg):
394 """Translate a known codename into a release description.
396 Unknown codenames will simply be returned as is.
398 # if we know something, tell
399 if codename in cfg.options('release codenames'):
400 return cfg.get('release codenames', codename)
405 def create_dir(path):
406 if os.path.exists(path):
409 ps = path.split(os.path.sep)
411 for i in range(1,len(ps) + 1):
412 p = os.path.sep.join(ps[:i])
414 if not os.path.exists(p):
418 def dde_get(url, fail=False):
419 # enforce delay to be friendly to DDE
422 data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
423 print "SUCCESS:", url
425 except urllib2.HTTPError, e:
426 print "NOINFO:", url, type(e)
428 except urllib2.URLError, e:
429 print "URLERROR:", url, type(e)
431 print "Permanant failure"
433 print "Try again after 30 seconds..."
435 return dde_get(url, fail=True)
436 except (StopIteration):
439 except json.ReadException, e:
440 print "UDD-DOWN?:", url, type(e)
444 def nitrc_get(spec, fail=False):
445 nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
447 # change into this from python 2.6 on
448 #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
449 data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
450 print "NITRC-SUCCESS:", spec
451 except urllib2.HTTPError, e:
452 print "NITRC-NOINFO:", spec, type(e)
454 except urllib2.URLError, e:
455 print "NITRC-URLERROR:", spec, type(e)
457 print "Permanant failure"
459 print "Try again after 30 seconds..."
461 return nitrc_get(spec, fail=True)
465 def parse_nitrc(data):
468 # simplify -- there is only one project in the data
469 project = data['projects'][0]
470 nitrc_filtered = {'downloads': 0,
472 for pkg in project['packages']:
473 for release in pkg['releases']:
474 for file in release['files']:
475 nitrc_filtered['downloads'] += file['download_count']
476 return nitrc_filtered
479 def import_nitrc(cfg, db):
481 if not cfg.has_option("nitrc ids", p):
483 nitrc_spec = cfg.get("nitrc ids", p)
484 nitrc_data = nitrc_get(nitrc_spec)
485 nitrc_excerpt = parse_nitrc(nitrc_data)
486 if not nitrc_excerpt is None:
487 db[p]['nitrc'] = nitrc_excerpt
491 def import_dde(cfg, db):
492 query_url = cfg.get('dde', 'pkgquery_url')
495 q = dde_get(query_url + "/packages/all/%s" % p)
497 # copy all stuff, while preserving non-overlapping information
498 for k, v in q.iteritems():
500 # get latest popcon info for debian and ubuntu
501 # cannot use origin field itself, since it is none for few packages
503 origin = q['drc'].split()[0]
504 if origin == 'ubuntu':
505 if q.has_key('popcon'):
506 db[p]['main']['ubuntu_popcon'] = q['popcon']
507 # if we have ubuntu, need to get debian
508 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
509 if q and q.has_key('popcon'):
510 db[p]['main']['debian_popcon'] = q['popcon']
511 elif origin == 'debian':
512 if q.has_key('popcon'):
513 db[p]['main']['debian_popcon'] = q['popcon']
514 # if we have debian, need to get ubuntu
515 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
516 if q and q.has_key('popcon'):
517 db[p]['main']['ubuntu_popcon'] = q['popcon']
519 print("Ignoring unkown origin '%s' for package '%s'." \
522 # now get info for package from all releases in UDD
523 q = dde_get(query_url + "/dist/p:%s" % p)
526 # hold all info about this package per distribution release
529 distkey = (trans_codename(cp['release'], cfg),
530 "%s-%s" % (cp['distribution'], cp['release']))
531 if not info.has_key(distkey):
533 # turn into a list to append others later
534 info[distkey]['architecture'] = [info[distkey]['architecture']]
535 # accumulate data for multiple over archs
537 comp = apt.VersionCompare(cp['version'],
538 info[distkey]['version'])
539 # found another arch for the same version
541 info[distkey]['architecture'].append(cp['architecture'])
542 # found newer version, dump the old ones
545 # turn into a list to append others later
546 info[distkey]['architecture'] = [info[distkey]['architecture']]
547 # simply ignore older versions
551 # finally assign the new package data
552 for k, v in info.iteritems():
557 def assure_unicode(s):
558 """Assure that argument is unicode
560 Necessary if strings are not carrying out Pythonish 'u' prefix to
561 signal UTF8 strings, but are in fact UTF8
563 if type(s) is unicode:
566 # attempt regular unicode call and if fails -- just decode it
570 except UnicodeDecodeError, e:
571 return s.decode('utf8')
573 return assure_unicode(str(s))
576 def convert_longdescr(ld):
577 ld = ld.replace('% ', '%% ')
579 for i, l in enumerate(ld):
581 ld[i] = ' #NEWLINEMARKER#'
582 # look for embedded lists
583 elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
584 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
586 ld = u' '.join([l[1:] for l in ld])
587 ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
588 # cleanup any leftover (e.g. trailing markers)
589 ld = ld.replace('#NEWLINEMARKER#', '')
590 # safe-guard ReST active symbols
591 ld = re.sub(r'([\'`*])', r'\\\1', ld)
595 def underline_text(text, symbol):
596 underline = symbol * len(text)
597 return '%s\n%s\n' % (text, underline)
600 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
601 # local binding for ease of use
603 # do nothing if there is not at least the very basic stuff
604 if not pkgdb['main'].has_key('description'):
606 title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
607 title = underline_text(title, '*')
610 if 'sv' in pkgdb['main']:
611 ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
612 if not os.path.exists(ex_dir):
614 page = template.render(
617 long_description=convert_longdescr(
618 assure_unicode(pkgdb['main']['long_description'])),
624 # the following can be replaced by something like
625 # {% include "sidebar.html" ignore missing %}
626 # in the template whenever jinja 2.2 becomes available
627 addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
628 if os.path.exists(addenum):
629 page += '\n\n.. include:: %s\n' % addenum
633 def store_db(db, filename):
634 pp = PrettyPrinter(indent=2)
635 f = codecs.open(filename, 'w', 'utf-8')
636 f.write(pp.pformat(db))
640 def read_db(filename):
641 f = codecs.open(filename, 'r', 'utf-8')
645 def write_sourceslist(jinja_env, cfg, outdir):
647 create_dir(os.path.join(outdir, 'lists'))
650 for release in cfg.options('release codenames'):
651 if release == 'data':
652 # no seperate list for the data archive
654 transrel = trans_codename(release, cfg)
656 for mirror in cfg.options('mirrors'):
657 listname = '%s.%s' % (release, mirror)
658 repos[transrel].append((mirror, listname))
659 lf = open(os.path.join(outdir, 'lists', listname), 'w')
660 for rel in ('data', release):
661 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
663 lf.write('deb %s' % aptcfg)
664 lf.write('#deb-src %s' % aptcfg)
667 id2codename = dict([(cfg.get('release backport ids', r), r)
668 for r in cfg.options('release codenames')])
669 id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
670 for r in cfg.options('release codenames')])
671 mirror2name = dict([(m, cfg.get('mirror names', m))
672 for m in cfg.options('mirrors')])
673 mirror2url = dict([(m, cfg.get('mirrors', m))
674 for m in cfg.options('mirrors')])
675 srclist_template = jinja_env.get_template('sources_lists.rst')
676 sl = open(os.path.join(outdir, 'sources_lists'), 'w')
677 sl.write(srclist_template.render(id2codename=id2codename,
678 id2relname=id2relname,
679 mirror2name=mirror2name,
680 mirror2url=mirror2url))
684 def sort_by_tasks(db):
686 for pkg in db.keys():
687 if not 'blends' in db[pkg]:
690 blendinfo = db[pkg]['blends']
691 if not 'tasks' in blendinfo:
692 # no task info in blend data
694 taskinfo = blendinfo['tasks']
695 for task in taskinfo:
697 if not taskname in tasks:
700 tasks[taskname].append(pkg)
704 def sort_by_maintainer(db):
707 for pkg in db.keys():
710 # start with the blends info
711 if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
712 maint = pkginfo['blends']['responsible']
713 if not 'main' in db[pkg] and maint is None:
716 info = db[pkg]['main']
717 if not 'maintainer' in info and maint is None:
720 if 'original_maintainer' in info and not info['original_maintainer'] is None:
721 maint = info['original_maintainer']
722 elif 'maintainer' in info and not info['maintainer'] is None:
723 maint = info['maintainer']
725 # no sane maintainer info
727 # safeguard: <> confuses sphinx and we don't care about different emails
728 maint = maint[:maint.find('<')].strip()
729 # kick out non-ascii ones (should not be, but too tired to find the bug)
731 codecs.ascii_decode(maint)
732 except UnicodeEncodeError:
734 if not maint.lower() in maints:
735 maints[maint.lower()] = []
736 maint_ids[maint.lower()] = [maint]
738 maint_ids[maint.lower()].append(maint)
739 maints[maint.lower()].append(pkg)
743 out[maint_ids[m][0]] = np.unique(maints[m])
747 def sort_by_release(db):
749 for pkg in db.keys():
752 if not isinstance(sec, tuple):
753 # only releases are of interest
756 if not relname in rels:
759 rels[relname].append(pkg)
762 rels[r] = np.unique(rels[r])
766 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
768 create_dir(os.path.join(outdir, 'pkgs'))
769 create_dir(os.path.join(outdir, 'pkglists'))
770 # template for individual package listings
771 toc_template = jinja_env.get_template('pkgs_toc.rst')
772 # the high-level package list overview
773 hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
774 hltoc.write('.. _pkglists:\n\n')
775 hltoc.write(underline_text('Software packages', '='))
776 defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
777 (sort_by_release(db), 'By release', 'Packages for %s'),
778 (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
780 # TOC for each thingie
781 pkgsdict, sectitle, title_tmpl = def_
782 hltoc.write(underline_text(sectitle, '-'))
783 ids = pkgsdict.keys()
786 label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
788 filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
789 if not len(filtered_pkgs):
791 plist = toc_template.render(
793 title=underline_text(title_tmpl % id_, '='),
796 toc = codecs.open(os.path.join(outdir,
802 hltoc.write('* :ref:`%s`\n' % label)
806 # now a complete list of all packages
807 hltoc.write(underline_text('Complete list', '-'))
808 toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
810 toc.write(toc_template.render(label='full_pkg_list',
811 title=underline_text('Complete package list', '='),
812 pkgs=db.keys(), db=db))
814 hltoc.write('* :ref:`full_pkg_list`\n')
817 # and now each individual package page
818 pkg_template = jinja_env.get_template('pkg.rst')
820 page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
821 # when no page is available skip this package
824 pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
829 def prepOptParser(op):
830 # use module docstring for help output
831 op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
833 op.add_option("--db",
834 action="store", type="string", dest="db",
836 help="Database file to read. Default: None")
838 op.add_option("--cfg",
839 action="store", type="string", dest="cfg",
841 help="Repository config file.")
843 op.add_option("-o", "--outdir",
844 action="store", type="string", dest="outdir",
846 help="Target directory for ReST output. Default: None")
848 op.add_option("-r", "--release-url",
849 action="append", dest="release_urls",
852 op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
853 type="string", default=None, help="None")
855 op.add_option("--extracts", action="store", dest="extracts_dir",
856 type="string", default=None, help="None")
860 op = OptionParser(version="%prog 0.0.2")
863 (opts, args) = op.parse_args()
866 print('There needs to be exactly one command')
872 print("'--cfg' option is mandatory.")
875 print("'--db' option is mandatory.")
879 cfg = SafeConfigParser()
882 # load existing db, unless renew is requested
883 if cmd == 'updatedb':
885 if cfg.has_option('packages', 'select taskfiles'):
886 db = add_pkgfromtaskfile(db, cfg.get('packages',
887 'select taskfiles').split())
889 # add additional package names from config file
890 if cfg.has_option('packages', 'select names'):
891 for p in cfg.get('packages', 'select names').split():
892 if not db.has_key(p):
893 db[p] = get_emptydbentry()
895 # get info from task files
896 if cfg.has_option('packages', 'prospective'):
897 for url in cfg.get('packages', 'prospective').split():
898 db = import_blendstask(cfg, db, url)
900 # parse NeuroDebian repository
901 if cfg.has_option('neurodebian', 'releases'):
902 for rurl in cfg.get('neurodebian', 'releases').split():
903 db = import_release(cfg, db, rurl)
905 # collect package information from DDE
906 db = import_dde(cfg, db)
907 # get info from NITRC
908 db = import_nitrc(cfg, db)
910 store_db(db, opts.db)
914 # load the db from file
915 db = read_db(opts.db)
918 jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
920 # generate package pages and TOC and write them to files
921 write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
923 write_sourceslist(jinja_env, cfg, opts.outdir)
925 if __name__ == "__main__":