2 """Tell me who you are!
7 from debian_bundle import deb822
11 # Lets first assure no guarding (but annoying) warnings
13 warnings.simplefilter('ignore', FutureWarning)
14 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
17 from ConfigParser import SafeConfigParser
18 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
30 from jinja2 import Environment, PackageLoader
32 from pprint import PrettyPrinter
35 class AptListsCache(object):
36 def __init__(self, cachedir='build/cache',
39 self.cachedir = cachedir
41 if not ro_cachedirs is None:
42 self.ro_cachedirs = ro_cachedirs
44 self.ro_cachedirs = []
47 create_dir(self.cachedir)
49 def get(self, url, update=False):
50 """Looks in the cache if the file is there and takes the cached one.
51 Otherwise it is downloaded first.
53 Knows how to deal with http:// and svn:// URLs.
58 # look whether it is compressed
59 cext = url.split('.')[-1]
60 if cext in ['gz', 'bz2']:
61 target_url = url[:-1 * len(cext) -1]
63 # assume not compressed
67 # turn url into a filename -- mimik what APT does for
69 tfilename = '_'.join(target_url.split('/')[2:])
71 # if we need to download anyway do not search
73 cfilename = os.path.join(self.cachedir, tfilename)
75 # look for the uncompressed file anywhere in the cache
77 for cp in [self.cachedir] + self.ro_cachedirs:
78 if os.path.exists(os.path.join(cp, tfilename)):
79 cfilename = os.path.join(cp, tfilename)
84 cfilename = os.path.join(self.cachedir, tfilename)
87 # if updated needed -- download
89 #print 'Caching file from %s' % url
91 if url.startswith('svn://'):
93 pysvn.Client().export(url, cfilename)
94 if url.startswith('http://'):
96 tempfile, ignored = urllib.urlretrieve(url)
101 decompressor = 'gzip'
103 decompressor = 'bzip2'
108 "Don't know how to decompress %s files" \
111 if not decompressor is None:
112 if subprocess.call([decompressor, '-d', '-q', '-f',
114 raise RuntimeError, \
115 "Something went wrong while decompressing '%s'" \
118 # move decompressed file into cache
119 shutil.move(os.path.splitext(tempfile)[0], cfilename)
121 # XXX do we need that if explicit filename is provided?
125 fh = codecs.open(cfilename, 'r', 'utf-8')
130 def add_pkgfromtaskfile(db, urls):
131 cache = AptListsCache()
137 # loop over all stanzas
138 for stanza in deb822.Packages.iter_paragraphs(fh):
139 if stanza.has_key('Depends'):
140 pkg = stanza['Depends']
141 elif stanza.has_key('Recommends'):
142 pkg = stanza['Recommends']
143 elif stanza.has_key('Suggests'):
144 pkg = stanza['Suggests']
148 # account for multiple packages per line
150 pkgs += [p.strip() for p in pkg.split(',')]
152 pkgs.append(pkg.strip())
155 if not db.has_key(p):
156 db[p] = get_emptydbentry()
160 def get_emptydbentry():
163 def import_blendstask(cfg, db, url):
164 cache = AptListsCache()
168 # figure out blend's task page URL, since they differ from blend to blend
169 urlsec = url.split('/')
170 blendname = urlsec[-3]
171 if blendname == 'debian-med':
172 taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
173 elif blendname == 'debian-science':
174 taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
176 raise ValueError('Unknown blend "%s"' % blendname)
177 taskpage_url += urlsec[-1]
179 for st in deb822.Packages.iter_paragraphs(fh):
180 if st.has_key('Task'):
181 task_name = st['Task']
182 task = (blendname, task_name, taskpage_url)
184 if st.has_key('Depends'):
186 elif st.has_key('Recommends'):
187 pkg = st['Recommends']
188 elif st.has_key('Suggests'):
191 # print 'Warning: Cannot determine name of prospective package ' \
192 # '... ignoring. Dump follows:'
196 # take care of pkg lists
197 for p in pkg.split(', '):
198 if not db.has_key(p):
199 print 'Ignoring blend package "%s"' % p
205 info['tasks'] = [task]
206 if st.has_key('License'):
207 info['license'] = st['License']
208 if st.has_key('Responsible'):
209 info['responsible'] = st['Responsible']
212 if st.has_key('Pkg-Description'):
213 descr = st['Pkg-Description'].split('\n')
214 info['description'] = descr[0].strip()
215 info['long_description'] = \
216 u'\n'.join(descr[1:])
218 # charge the basic property set
219 db[p]['main']['description'] = info['description']
220 db[p]['main']['long_description'] = info['long_description']
221 if st.has_key('WNPP'):
222 db[p]['main']['debian_itp'] = st['WNPP']
223 if st.has_key('Pkg-URL'):
224 db[p]['main']['other_pkg'] = st['Pkg-URL']
225 if st.has_key('Homepage'):
226 db[p]['main']['homepage'] = st['Homepage']
229 if st.has_key('Published-Title'):
230 title = st['Published-Title']
232 # trip trailing dot -- added later
233 pub = {'title': title[:-1]}
235 pub = {'title': title}
236 if st.has_key('Published-Authors'):
237 pub['authors'] = st['Published-Authors']
238 if st.has_key('Published-Year'):
239 pub['year'] = st['Published-Year']
240 if st.has_key('Published-In'):
241 pub['in'] = st['Published-In']
242 if st.has_key('Published-URL'):
243 pub['url'] = st['Published-URL']
244 if st.has_key('Published-DOI'):
245 pub['doi'] = st['Published-DOI']
246 # need at least one URL
247 if not pub.has_key('url'):
248 pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
250 db[p]['main']['publication'] = pub
253 if st.has_key('Registration'):
254 db[p]['main']['registration'] = st['Registration']
257 if st.has_key('Remark'):
258 # prepend a single space to make it look like a long description
259 info['remark'] = convert_longdescr(' ' + st['Remark'])
261 # only store if there isn't something already
262 if not db[p].has_key('blends'):
263 db[p]['blends'] = info
265 # just add this tasks name and id
266 db[p]['blends']['tasks'].append(task)
268 # handle pkg name aliases
269 if p in cfg.options('blend package aliases'):
270 src_entry = db[p].copy()
271 # remove original entry
273 # copy the entry into all aliases
274 for alias in cfg.get('blend package aliases', p).split():
275 print "Aliasing %s to %s" % (p, alias)
276 db[alias] = copy.deepcopy(src_entry)
281 def get_releaseinfo(rurl):
282 cache = AptListsCache()
283 # root URL of the repository
284 baseurl = '/'.join(rurl.split('/')[:-1])
285 # get the release file from the cache
286 release_file = cache.get(rurl)
288 # create parser instance
289 rp = deb822.Release(release_file)
291 # architectures on this dist
292 archs = rp['Architectures'].split()
293 components = rp['Components'].split()
294 # compile a new codename that also considers the repository label
295 # to distinguish between official and unofficial repos.
297 origin = rp['Origin']
298 codename = rp['Codename']
299 labelcode = '_'.join([rp['Label'], rp['Codename']])
304 return {'baseurl': baseurl, 'archs': archs, 'components': components,
305 'codename': codename, 'label': label, 'labelcode': labelcode,
309 def build_pkgsurl(baseurl, component, arch):
310 return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
313 def import_release(cfg, db, rurl):
314 cache = AptListsCache()
316 ri = get_releaseinfo(rurl)
318 # compile the list of Packages files to parse and parse them
319 for c in ri['components']:
320 for a in ri['archs']:
321 # compile packages URL
322 pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
324 # retrieve from cache
325 packages_file = cache.get(pkgsurl)
328 for stanza in deb822.Packages.iter_paragraphs(packages_file):
329 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
332 packages_file.close()
336 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
343 # only care for known packages
344 if not db.has_key(pkg):
345 # print 'Ignoring NeuroDebian package "%s"' % pkg
348 distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
350 if db[pkg].has_key(distkey):
351 info = db[pkg][distkey]
353 info = {'architecture': []}
356 if not st['Architecture'] in info['architecture']:
357 info['architecture'].append(st['Architecture'])
358 info['maintainer'] = st['Maintainer']
359 if st.has_key('Homepage'):
360 info['homepage'] = st['Homepage']
361 info['version'] = st['Version']
364 info['distribution'] = origin
365 info['release'] = codename
366 info['component'] = component
369 info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
372 descr = st['Description'].replace('%', '%%').split('\n')
373 info['description'] = descr[0].strip()
374 info['long_description'] = u'\n'.join(descr[1:])
376 db[pkg][distkey] = info
378 # charge the basic property set
379 db[pkg]['main']['description'] = info['description']
380 db[pkg]['main']['long_description'] = info['long_description']
381 if st.has_key('Source'):
382 db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
384 db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
385 if st.has_key('Homepage'):
386 db[pkg]['main']['homepage'] = st['Homepage']
387 if st.has_key('Recommends'):
388 db[pkg]['main']['recommends'] = st['Recommends']
393 def trans_codename(codename, cfg):
394 """Translate a known codename into a release description.
396 Unknown codenames will simply be returned as is.
398 # if we know something, tell
399 if codename in cfg.options('release codenames'):
400 return cfg.get('release codenames', codename)
405 def create_dir(path):
406 if os.path.exists(path):
409 ps = path.split(os.path.sep)
411 for i in range(1,len(ps) + 1):
412 p = os.path.sep.join(ps[:i])
414 if not os.path.exists(p):
418 def dde_get(url, fail=False):
419 # enforce delay to be friendly to DDE
422 data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
423 print "SUCCESS:", url
425 except urllib2.HTTPError, e:
426 print "NOINFO:", url, type(e)
428 except urllib2.URLError, e:
429 print "URLERROR:", url, type(e)
431 print "Permanant failure"
433 print "Try again after 30 seconds..."
435 return dde_get(url, fail=True)
436 except (StopIteration):
439 except json.ReadException, e:
440 print "UDD-DOWN?:", url, type(e)
444 def nitrc_get(spec, fail=False):
445 nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
447 # change into this from python 2.6 on
448 #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
449 data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
450 print "NITRC-SUCCESS:", spec
451 except urllib2.HTTPError, e:
452 print "NITRC-NOINFO:", spec, type(e)
454 except urllib2.URLError, e:
455 print "NITRC-URLERROR:", spec, type(e)
457 print "Permanant failure"
459 print "Try again after 30 seconds..."
461 return nitrc_get(spec, fail=True)
465 def parse_nitrc(data):
468 # simplify -- there is only one project in the data
469 project = data['projects'][0]
470 nitrc_filtered = {'downloads': 0,
472 for pkg in project['packages']:
473 for release in pkg['releases']:
474 for file in release['files']:
475 nitrc_filtered['downloads'] += file['download_count']
476 return nitrc_filtered
479 def import_nitrc(cfg, db):
481 if not cfg.has_option("nitrc ids", p):
483 nitrc_spec = cfg.get("nitrc ids", p)
484 nitrc_data = nitrc_get(nitrc_spec)
485 nitrc_excerpt = parse_nitrc(nitrc_data)
486 if not nitrc_excerpt is None:
487 db[p]['nitrc'] = nitrc_excerpt
491 def import_dde(cfg, db):
492 query_url = cfg.get('dde', 'pkgquery_url')
495 q = dde_get(query_url + "/packages/all/%s" % p)
497 # copy all stuff, while preserving non-overlapping information
498 for k, v in q.iteritems():
500 # get latest popcon info for debian and ubuntu
501 # cannot use origin field itself, since it is none for few packages
503 origin = q['drc'].split()[0]
504 if origin == 'ubuntu':
505 if q.has_key('popcon'):
506 db[p]['main']['ubuntu_popcon'] = q['popcon']
507 # if we have ubuntu, need to get debian
508 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
509 if q and q.has_key('popcon'):
510 db[p]['main']['debian_popcon'] = q['popcon']
511 elif origin == 'debian':
512 if q.has_key('popcon'):
513 db[p]['main']['debian_popcon'] = q['popcon']
514 # if we have debian, need to get ubuntu
515 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
516 if q and q.has_key('popcon'):
517 db[p]['main']['ubuntu_popcon'] = q['popcon']
519 print("Ignoring unkown origin '%s' for package '%s'." \
522 # now get info for package from all releases in UDD
523 q = dde_get(query_url + "/dist/p:%s" % p)
526 # hold all info about this package per distribution release
529 distkey = (trans_codename(cp['release'], cfg),
530 "%s-%s" % (cp['distribution'], cp['release']))
531 if not info.has_key(distkey):
533 # turn into a list to append others later
534 info[distkey]['architecture'] = [info[distkey]['architecture']]
535 # accumulate data for multiple over archs
537 comp = apt.VersionCompare(cp['version'],
538 info[distkey]['version'])
539 # found another arch for the same version
541 info[distkey]['architecture'].append(cp['architecture'])
542 # found newer version, dump the old ones
545 # turn into a list to append others later
546 info[distkey]['architecture'] = [info[distkey]['architecture']]
547 # simply ignore older versions
551 # finally assign the new package data
552 for k, v in info.iteritems():
557 def assure_unicode(s):
558 """Assure that argument is unicode
560 Necessary if strings are not carrying out Pythonish 'u' prefix to
561 signal UTF8 strings, but are in fact UTF8
563 if type(s) is unicode:
566 # attempt regular unicode call and if fails -- just decode it
570 except UnicodeDecodeError, e:
571 return s.decode('utf8')
573 return assure_unicode(str(s))
576 def convert_longdescr(ld):
578 ld = ld.replace('% ', '%% ')
581 for i, l in enumerate(ld):
584 ld[i] = ' #NEWLINEMARKER# '
585 # look for embedded lists
586 elif len(l) >=3 and l[:2] == ' ':
589 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
591 ld[i] = ' \n::\n\n' + l
598 descr = descr.replace('#NEWLINEMARKER# ', '\n\n')
599 # cleanup any leftover (e.g. trailing markers)
600 descr = descr.replace('#NEWLINEMARKER#', '')
601 # safe-guard ReST active symbols
602 descr = re.sub(r'([\'`*])', r'\\\1', descr)
606 def underline_text(text, symbol):
607 underline = symbol * len(text)
608 return '%s\n%s\n' % (text, underline)
611 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
612 # local binding for ease of use
614 # do nothing if there is not at least the very basic stuff
615 if not pkgdb['main'].has_key('description'):
617 title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
618 title = underline_text(title, '*')
621 if 'sv' in pkgdb['main']:
622 ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
623 if not os.path.exists(ex_dir):
625 page = template.render(
628 long_description=convert_longdescr(
629 assure_unicode(pkgdb['main']['long_description'])),
635 # the following can be replaced by something like
636 # {% include "sidebar.html" ignore missing %}
637 # in the template whenever jinja 2.2 becomes available
638 addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
639 if os.path.exists(addenum):
640 page += '\n\n.. include:: %s\n' % addenum
644 def store_db(db, filename):
645 pp = PrettyPrinter(indent=2)
646 f = codecs.open(filename, 'w', 'utf-8')
647 f.write(pp.pformat(db))
651 def read_db(filename):
652 f = codecs.open(filename, 'r', 'utf-8')
656 def write_sourceslist(jinja_env, cfg, outdir):
658 create_dir(os.path.join(outdir, 'lists'))
661 for release in cfg.options('release codenames'):
662 if release == 'data':
663 # no seperate list for the data archive
665 transrel = trans_codename(release, cfg)
667 for mirror in cfg.options('mirrors'):
668 listname = '%s.%s' % (release, mirror)
669 repos[transrel].append((mirror, listname))
670 lf = open(os.path.join(outdir, 'lists', listname), 'w')
671 for rel in ('data', release):
672 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
674 lf.write('deb %s' % aptcfg)
675 lf.write('#deb-src %s' % aptcfg)
678 id2codename = dict([(cfg.get('release backport ids', r), r)
679 for r in cfg.options('release codenames')])
680 id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
681 for r in cfg.options('release codenames')])
682 mirror2name = dict([(m, cfg.get('mirror names', m))
683 for m in cfg.options('mirrors')])
684 mirror2url = dict([(m, cfg.get('mirrors', m))
685 for m in cfg.options('mirrors')])
686 srclist_template = jinja_env.get_template('sources_lists.rst')
687 sl = open(os.path.join(outdir, 'sources_lists'), 'w')
688 sl.write(srclist_template.render(id2codename=id2codename,
689 id2relname=id2relname,
690 mirror2name=mirror2name,
691 mirror2url=mirror2url))
695 def sort_by_tasks(db):
697 for pkg in db.keys():
698 if not 'blends' in db[pkg]:
701 blendinfo = db[pkg]['blends']
702 if not 'tasks' in blendinfo:
703 # no task info in blend data
705 taskinfo = blendinfo['tasks']
706 for task in taskinfo:
708 if not taskname in tasks:
711 tasks[taskname].append(pkg)
715 def sort_by_maintainer(db):
718 for pkg in db.keys():
721 # start with the blends info
722 if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
723 maint = pkginfo['blends']['responsible']
724 if not 'main' in db[pkg] and maint is None:
727 info = db[pkg]['main']
728 if not 'maintainer' in info and maint is None:
731 if 'original_maintainer' in info and not info['original_maintainer'] is None:
732 maint = info['original_maintainer']
733 elif 'maintainer' in info and not info['maintainer'] is None:
734 maint = info['maintainer']
736 # no sane maintainer info
738 # safeguard: <> confuses sphinx and we don't care about different emails
739 maint = maint[:maint.find('<')].strip()
740 # kick out non-ascii ones (should not be, but too tired to find the bug)
742 codecs.ascii_decode(maint)
743 except UnicodeEncodeError:
745 if not maint.lower() in maints:
746 maints[maint.lower()] = []
747 maint_ids[maint.lower()] = [maint]
749 maint_ids[maint.lower()].append(maint)
750 maints[maint.lower()].append(pkg)
754 out[maint_ids[m][0]] = np.unique(maints[m])
758 def sort_by_release(db):
760 for pkg in db.keys():
763 if not isinstance(sec, tuple):
764 # only releases are of interest
767 if not relname in rels:
770 rels[relname].append(pkg)
773 rels[r] = np.unique(rels[r])
777 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
779 create_dir(os.path.join(outdir, 'pkgs'))
780 create_dir(os.path.join(outdir, 'pkglists'))
781 # template for individual package listings
782 toc_template = jinja_env.get_template('pkgs_toc.rst')
783 # the high-level package list overview
784 hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
785 hltoc.write('.. _pkglists:\n\n')
786 hltoc.write(underline_text('Software packages', '='))
787 defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
788 (sort_by_release(db), 'By release', 'Packages for %s'),
789 (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
791 # TOC for each thingie
792 pkgsdict, sectitle, title_tmpl = def_
793 hltoc.write(underline_text(sectitle, '-'))
794 ids = pkgsdict.keys()
797 label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
799 filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
800 if not len(filtered_pkgs):
802 plist = toc_template.render(
804 title=underline_text(title_tmpl % id_, '='),
807 toc = codecs.open(os.path.join(outdir,
813 hltoc.write('* :ref:`%s`\n' % label)
817 # now a complete list of all packages
818 hltoc.write(underline_text('Complete list', '-'))
819 toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
821 toc.write(toc_template.render(label='full_pkg_list',
822 title=underline_text('Complete package list', '='),
823 pkgs=db.keys(), db=db))
825 hltoc.write('* :ref:`full_pkg_list`\n')
828 # and now each individual package page
829 pkg_template = jinja_env.get_template('pkg.rst')
831 page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
832 # when no page is available skip this package
835 pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
840 def prepOptParser(op):
841 # use module docstring for help output
842 op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
844 op.add_option("--db",
845 action="store", type="string", dest="db",
847 help="Database file to read. Default: None")
849 op.add_option("--cfg",
850 action="store", type="string", dest="cfg",
852 help="Repository config file.")
854 op.add_option("-o", "--outdir",
855 action="store", type="string", dest="outdir",
857 help="Target directory for ReST output. Default: None")
859 op.add_option("-r", "--release-url",
860 action="append", dest="release_urls",
863 op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
864 type="string", default=None, help="None")
866 op.add_option("--extracts", action="store", dest="extracts_dir",
867 type="string", default=None, help="None")
871 op = OptionParser(version="%prog 0.0.2")
874 (opts, args) = op.parse_args()
877 print('There needs to be exactly one command')
883 print("'--cfg' option is mandatory.")
886 print("'--db' option is mandatory.")
890 cfg = SafeConfigParser()
893 # load existing db, unless renew is requested
894 if cmd == 'updatedb':
896 if cfg.has_option('packages', 'select taskfiles'):
897 db = add_pkgfromtaskfile(db, cfg.get('packages',
898 'select taskfiles').split())
900 # add additional package names from config file
901 if cfg.has_option('packages', 'select names'):
902 for p in cfg.get('packages', 'select names').split():
903 if not db.has_key(p):
904 db[p] = get_emptydbentry()
906 # get info from task files
907 if cfg.has_option('packages', 'prospective'):
908 for url in cfg.get('packages', 'prospective').split():
909 db = import_blendstask(cfg, db, url)
911 # parse NeuroDebian repository
912 if cfg.has_option('neurodebian', 'releases'):
913 for rurl in cfg.get('neurodebian', 'releases').split():
914 db = import_release(cfg, db, rurl)
916 # collect package information from DDE
917 db = import_dde(cfg, db)
918 # get info from NITRC
919 db = import_nitrc(cfg, db)
921 store_db(db, opts.db)
925 # load the db from file
926 db = read_db(opts.db)
929 jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
931 # generate package pages and TOC and write them to files
932 write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
934 write_sourceslist(jinja_env, cfg, opts.outdir)
936 if __name__ == "__main__":