2 """Tell me who you are!
7 from debian_bundle import deb822
10 # Lets first assure no guarding (but annoying) warnings
12 warnings.simplefilter('ignore', FutureWarning)
13 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
16 from ConfigParser import SafeConfigParser
17 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
29 from jinja2 import Environment, PackageLoader
31 from pprint import PrettyPrinter
34 class AptListsCache(object):
35 def __init__(self, cachedir='build/cache',
38 self.cachedir = cachedir
40 if not ro_cachedirs is None:
41 self.ro_cachedirs = ro_cachedirs
43 self.ro_cachedirs = []
46 create_dir(self.cachedir)
48 def get(self, url, update=False):
49 """Looks in the cache if the file is there and takes the cached one.
50 Otherwise it is downloaded first.
52 Knows how to deal with http:// and svn:// URLs.
57 # look whether it is compressed
58 cext = url.split('.')[-1]
59 if cext in ['gz', 'bz2']:
60 target_url = url[:-1 * len(cext) -1]
62 # assume not compressed
66 # turn url into a filename -- mimik what APT does for
68 tfilename = '_'.join(target_url.split('/')[2:])
70 # if we need to download anyway do not search
72 cfilename = os.path.join(self.cachedir, tfilename)
74 # look for the uncompressed file anywhere in the cache
76 for cp in [self.cachedir] + self.ro_cachedirs:
77 if os.path.exists(os.path.join(cp, tfilename)):
78 cfilename = os.path.join(cp, tfilename)
83 cfilename = os.path.join(self.cachedir, tfilename)
86 # if updated needed -- download
88 #print 'Caching file from %s' % url
90 if url.startswith('svn://'):
92 pysvn.Client().export(url, cfilename)
93 if url.startswith('http://'):
95 tempfile, ignored = urllib.urlretrieve(url)
100 decompressor = 'gzip'
102 decompressor = 'bzip2'
107 "Don't know how to decompress %s files" \
110 if not decompressor is None:
111 if subprocess.call([decompressor, '-d', '-q', '-f',
113 raise RuntimeError, \
114 "Something went wrong while decompressing '%s'" \
117 # move decompressed file into cache
118 shutil.move(os.path.splitext(tempfile)[0], cfilename)
120 # XXX do we need that if explicit filename is provided?
124 fh = codecs.open(cfilename, 'r', 'utf-8')
129 def add_pkgfromtaskfile(db, urls):
130 cache = AptListsCache()
136 # loop over all stanzas
137 for stanza in deb822.Packages.iter_paragraphs(fh):
138 if stanza.has_key('Depends'):
139 pkg = stanza['Depends']
140 elif stanza.has_key('Recommends'):
141 pkg = stanza['Recommends']
142 elif stanza.has_key('Suggests'):
143 pkg = stanza['Suggests']
147 # account for multiple packages per line
149 pkgs += [p.strip() for p in pkg.split(',')]
151 pkgs.append(pkg.strip())
154 if not db.has_key(p):
155 db[p] = get_emptydbentry()
159 def get_emptydbentry():
162 def import_blendstask(cfg, db, url):
163 cache = AptListsCache()
167 # figure out blend's task page URL, since they differ from blend to blend
168 urlsec = url.split('/')
169 blendname = urlsec[-3]
170 if blendname == 'debian-med':
171 taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
172 elif blendname == 'debian-science':
173 taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
175 raise ValueError('Unknown blend "%s"' % blendname)
176 taskpage_url += urlsec[-1]
178 for st in deb822.Packages.iter_paragraphs(fh):
179 if st.has_key('Task'):
180 task_name = st['Task']
181 task = (blendname, task_name, taskpage_url)
183 if st.has_key('Depends'):
185 elif st.has_key('Recommends'):
186 pkg = st['Recommends']
187 elif st.has_key('Suggests'):
190 # print 'Warning: Cannot determine name of prospective package ' \
191 # '... ignoring. Dump follows:'
195 # take care of pkg lists
196 for p in pkg.split(', '):
197 if not db.has_key(p):
198 print 'Ignoring blend package "%s"' % p
204 info['tasks'] = [task]
205 if st.has_key('License'):
206 info['license'] = st['License']
207 if st.has_key('Responsible'):
208 info['responsible'] = st['Responsible']
211 if st.has_key('Pkg-Description'):
212 descr = st['Pkg-Description'].split('\n')
213 info['description'] = descr[0].strip()
214 info['long_description'] = \
215 u'\n'.join(descr[1:])
217 # charge the basic property set
218 db[p]['main']['description'] = info['description']
219 db[p]['main']['long_description'] = info['long_description']
220 if st.has_key('WNPP'):
221 db[p]['main']['debian_itp'] = st['WNPP']
222 if st.has_key('Pkg-URL'):
223 db[p]['main']['other_pkg'] = st['Pkg-URL']
224 if st.has_key('Homepage'):
225 db[p]['main']['homepage'] = st['Homepage']
228 if st.has_key('Published-Title'):
229 title = st['Published-Title']
231 # trip trailing dot -- added later
232 pub = {'title': title[:-1]}
234 pub = {'title': title}
235 if st.has_key('Published-Authors'):
236 pub['authors'] = st['Published-Authors']
237 if st.has_key('Published-Year'):
238 pub['year'] = st['Published-Year']
239 if st.has_key('Published-In'):
240 pub['in'] = st['Published-In']
241 if st.has_key('Published-URL'):
242 pub['url'] = st['Published-URL']
243 if st.has_key('Published-DOI'):
244 pub['doi'] = st['Published-DOI']
245 # need at least one URL
246 if not pub.has_key('url'):
247 pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
249 db[p]['main']['publication'] = pub
252 if st.has_key('Registration'):
253 db[p]['main']['registration'] = st['Registration']
256 if st.has_key('Remark'):
257 # prepend a single space to make it look like a long description
258 info['remark'] = convert_longdescr(' ' + st['Remark'])
260 # only store if there isn't something already
261 if not db[p].has_key('blends'):
262 db[p]['blends'] = info
264 # just add this tasks name and id
265 db[p]['blends']['tasks'].append(task)
267 # handle pkg name aliases
268 if p in cfg.options('blend package aliases'):
269 src_entry = db[p].copy()
270 # remove original entry
272 # copy the entry into all aliases
273 for alias in cfg.get('blend package aliases', p).split():
274 print "Aliasing %s to %s" % (p, alias)
275 db[alias] = copy.deepcopy(src_entry)
280 def get_releaseinfo(rurl):
281 cache = AptListsCache()
282 # root URL of the repository
283 baseurl = '/'.join(rurl.split('/')[:-1])
284 # get the release file from the cache
285 release_file = cache.get(rurl)
287 # create parser instance
288 rp = deb822.Release(release_file)
290 # architectures on this dist
291 archs = rp['Architectures'].split()
292 components = rp['Components'].split()
293 # compile a new codename that also considers the repository label
294 # to distinguish between official and unofficial repos.
296 origin = rp['Origin']
297 codename = rp['Codename']
298 labelcode = '_'.join([rp['Label'], rp['Codename']])
303 return {'baseurl': baseurl, 'archs': archs, 'components': components,
304 'codename': codename, 'label': label, 'labelcode': labelcode,
308 def build_pkgsurl(baseurl, component, arch):
309 return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
312 def import_release(cfg, db, rurl):
313 cache = AptListsCache()
315 ri = get_releaseinfo(rurl)
317 # compile the list of Packages files to parse and parse them
318 for c in ri['components']:
319 for a in ri['archs']:
320 # compile packages URL
321 pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
323 # retrieve from cache
324 packages_file = cache.get(pkgsurl)
327 for stanza in deb822.Packages.iter_paragraphs(packages_file):
328 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
331 packages_file.close()
335 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
342 # only care for known packages
343 if not db.has_key(pkg):
344 # print 'Ignoring NeuroDebian package "%s"' % pkg
347 distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
349 if db[pkg].has_key(distkey):
350 info = db[pkg][distkey]
352 info = {'architecture': []}
355 if not st['Architecture'] in info['architecture']:
356 info['architecture'].append(st['Architecture'])
357 info['maintainer'] = st['Maintainer']
358 if st.has_key('Homepage'):
359 info['homepage'] = st['Homepage']
360 info['version'] = st['Version']
363 info['distribution'] = origin
364 info['release'] = codename
365 info['component'] = component
368 info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
371 descr = st['Description'].replace('%', '%%').split('\n')
372 info['description'] = descr[0].strip()
373 info['long_description'] = u'\n'.join(descr[1:])
375 db[pkg][distkey] = info
377 # charge the basic property set
378 db[pkg]['main']['description'] = info['description']
379 db[pkg]['main']['long_description'] = info['long_description']
380 if st.has_key('Source'):
381 db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
383 db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
384 if st.has_key('Homepage'):
385 db[pkg]['main']['homepage'] = st['Homepage']
386 if st.has_key('Recommends'):
387 db[pkg]['main']['recommends'] = st['Recommends']
392 def trans_codename(codename, cfg):
393 """Translate a known codename into a release description.
395 Unknown codenames will simply be returned as is.
397 # if we know something, tell
398 if codename in cfg.options('release codenames'):
399 return cfg.get('release codenames', codename)
404 def create_dir(path):
405 if os.path.exists(path):
408 ps = path.split(os.path.sep)
410 for i in range(1,len(ps) + 1):
411 p = os.path.sep.join(ps[:i])
413 if not os.path.exists(p):
417 def dde_get(url, fail=False):
418 # enforce delay to be friendly to DDE
421 data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
422 print "SUCCESS:", url
424 except urllib2.HTTPError, e:
425 print "NOINFO:", url, type(e)
427 except urllib2.URLError, e:
428 print "URLERROR:", url, type(e)
430 print "Permanant failure"
432 print "Try again after 30 seconds..."
434 return dde_get(url, fail=True)
435 except (StopIteration):
438 except json.ReadException, e:
439 print "UDD-DOWN?:", url, type(e)
443 def nitrc_get(spec, fail=False):
444 nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
446 # change into this from python 2.6 on
447 #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
448 data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
449 print "NITRC-SUCCESS:", spec
450 except urllib2.HTTPError, e:
451 print "NITRC-NOINFO:", spec, type(e)
453 except urllib2.URLError, e:
454 print "NITRC-URLERROR:", spec, type(e)
456 print "Permanant failure"
458 print "Try again after 30 seconds..."
460 return nitrc_get(spec, fail=True)
464 def parse_nitrc(data):
467 # simplify -- there is only one project in the data
468 project = data['projects'][0]
469 nitrc_filtered = {'downloads': 0,
471 for pkg in project['packages']:
472 for release in pkg['releases']:
473 for file in release['files']:
474 nitrc_filtered['downloads'] += file['download_count']
475 return nitrc_filtered
478 def import_nitrc(cfg, db):
480 if not cfg.has_option("nitrc ids", p):
482 nitrc_spec = cfg.get("nitrc ids", p)
483 nitrc_data = nitrc_get(nitrc_spec)
484 nitrc_excerpt = parse_nitrc(nitrc_data)
485 if not nitrc_excerpt is None:
486 db[p]['nitrc'] = nitrc_excerpt
490 def import_dde(cfg, db):
491 query_url = cfg.get('dde', 'pkgquery_url')
494 q = dde_get(query_url + "/packages/all/%s" % p)
496 # copy all stuff, while preserving non-overlapping information
497 for k, v in q.iteritems():
499 # get latest popcon info for debian and ubuntu
500 # cannot use origin field itself, since it is none for few packages
502 origin = q['drc'].split()[0]
503 if origin == 'ubuntu':
504 if q.has_key('popcon'):
505 db[p]['main']['ubuntu_popcon'] = q['popcon']
506 # if we have ubuntu, need to get debian
507 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
508 if q and q.has_key('popcon'):
509 db[p]['main']['debian_popcon'] = q['popcon']
510 elif origin == 'debian':
511 if q.has_key('popcon'):
512 db[p]['main']['debian_popcon'] = q['popcon']
513 # if we have debian, need to get ubuntu
514 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
515 if q and q.has_key('popcon'):
516 db[p]['main']['ubuntu_popcon'] = q['popcon']
518 print("Ignoring unkown origin '%s' for package '%s'." \
521 # now get info for package from all releases in UDD
522 q = dde_get(query_url + "/dist/p:%s" % p)
525 # hold all info about this package per distribution release
528 distkey = (trans_codename(cp['release'], cfg),
529 "%s-%s" % (cp['distribution'], cp['release']))
530 if not info.has_key(distkey):
532 # turn into a list to append others later
533 info[distkey]['architecture'] = [info[distkey]['architecture']]
534 # accumulate data for multiple over archs
536 comp = apt.VersionCompare(cp['version'],
537 info[distkey]['version'])
538 # found another arch for the same version
540 info[distkey]['architecture'].append(cp['architecture'])
541 # found newer version, dump the old ones
544 # turn into a list to append others later
545 info[distkey]['architecture'] = [info[distkey]['architecture']]
546 # simply ignore older versions
550 # finally assign the new package data
551 for k, v in info.iteritems():
556 def assure_unicode(s):
557 """Assure that argument is unicode
559 Necessary if strings are not carrying out Pythonish 'u' prefix to
560 signal UTF8 strings, but are in fact UTF8
562 if type(s) is unicode:
565 # attempt regular unicode call and if fails -- just decode it
569 except UnicodeDecodeError, e:
570 return s.decode('utf8')
572 return assure_unicode(str(s))
575 def convert_longdescr(ld):
576 ld = ld.replace('% ', '%% ')
578 for i, l in enumerate(ld):
580 ld[i] = ' #NEWLINEMARKER#'
581 # look for embedded lists
582 elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
583 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
585 ld = u' '.join([l[1:] for l in ld])
586 ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
587 # cleanup any leftover (e.g. trailing markers)
588 ld = ld.replace('#NEWLINEMARKER#', '')
589 # safe-guard ReST active symbols
590 ld = re.sub(r'([\'`*])', r'\\\1', ld)
594 def underline_text(text, symbol):
595 underline = symbol * len(text)
596 return '%s\n%s\n' % (text, underline)
599 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
600 # local binding for ease of use
602 # do nothing if there is not at least the very basic stuff
603 if not pkgdb['main'].has_key('description'):
605 title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
606 title = underline_text(title, '*')
609 if 'sv' in pkgdb['main']:
610 ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
611 if not os.path.exists(ex_dir):
613 page = template.render(
616 long_description=convert_longdescr(
617 assure_unicode(pkgdb['main']['long_description'])),
623 # the following can be replaced by something like
624 # {% include "sidebar.html" ignore missing %}
625 # in the template whenever jinja 2.2 becomes available
626 addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
627 if os.path.exists(addenum):
628 page += '\n\n.. include:: %s\n' % addenum
632 def store_db(db, filename):
633 pp = PrettyPrinter(indent=2)
634 f = codecs.open(filename, 'w', 'utf-8')
635 f.write(pp.pformat(db))
639 def read_db(filename):
640 f = codecs.open(filename, 'r', 'utf-8')
644 def write_sourceslist(jinja_env, cfg, outdir):
646 create_dir(os.path.join(outdir, 'lists'))
649 for release in cfg.options('release codenames'):
650 if release == 'data':
651 # no seperate list for the data archive
653 transrel = trans_codename(release, cfg)
655 for mirror in cfg.options('mirrors'):
656 listname = '%s.%s' % (release, mirror)
657 repos[transrel].append((mirror, listname))
658 lf = open(os.path.join(outdir, 'lists', listname), 'w')
659 for rel in ('data', release):
660 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
662 lf.write('deb %s' % aptcfg)
663 lf.write('#deb-src %s' % aptcfg)
666 id2codename = dict([(cfg.get('release backport ids', r), r)
667 for r in cfg.options('release codenames')])
668 id2relname = dict([(cfg.get('release backport ids', r), trans_codename(r, cfg))
669 for r in cfg.options('release codenames')])
670 mirror2name = dict([(m, cfg.get('mirror names', m))
671 for m in cfg.options('mirrors')])
672 mirror2url = dict([(m, cfg.get('mirrors', m))
673 for m in cfg.options('mirrors')])
674 srclist_template = jinja_env.get_template('sources_lists.rst')
675 sl = open(os.path.join(outdir, 'sources_lists'), 'w')
676 sl.write(srclist_template.render(id2codename=id2codename,
677 id2relname=id2relname,
678 mirror2name=mirror2name,
679 mirror2url=mirror2url))
683 def sort_by_tasks(db):
685 for pkg in db.keys():
686 if not 'blends' in db[pkg]:
689 blendinfo = db[pkg]['blends']
690 if not 'tasks' in blendinfo:
691 # no task info in blend data
693 taskinfo = blendinfo['tasks']
694 for task in taskinfo:
696 if not taskname in tasks:
699 tasks[taskname].append(pkg)
703 def sort_by_maintainer(db):
705 for pkg in db.keys():
706 if not 'main' in db[pkg]:
709 info = db[pkg]['main']
710 if not 'maintainer' in info:
713 if 'original_maintainer' in info and not info['original_maintainer'] is None:
714 maint = info['original_maintainer']
716 maint = info['maintainer']
718 # no sane maintainer info
720 # safeguard: <> confuses sphinx and we don't care about different emails
721 maint = maint[:maint.find('<')].strip()
722 if not maint in maints:
725 maints[maint].append(pkg)
728 maints[m] = np.unique(maints[m])
732 def sort_by_release(db):
734 for pkg in db.keys():
737 if not isinstance(sec, tuple):
738 # only releases are of interest
741 if not relname in rels:
744 rels[relname].append(pkg)
747 rels[r] = np.unique(rels[r])
751 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
753 create_dir(os.path.join(outdir, 'pkgs'))
754 create_dir(os.path.join(outdir, 'pkglists'))
755 # template for individual package listings
756 toc_template = jinja_env.get_template('pkgs_toc.rst')
757 # the high-level package list overview
758 hltoc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
759 hltoc.write('.. _pkglists:\n\n')
760 hltoc.write(underline_text('Software packages', '='))
761 defs = [(sort_by_tasks(db), 'By purpose', 'Packages for %s'),
762 (sort_by_release(db), 'By release', 'Packages for %s'),
763 (sort_by_maintainer(db), 'By maintainer', 'Packages by %s')]
765 # TOC for each thingie
766 pkgsdict, sectitle, title_tmpl = def_
767 hltoc.write(underline_text(sectitle, '-'))
768 ids = pkgsdict.keys()
769 for id_ in np.unique(ids):
770 label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
771 toc = codecs.open(os.path.join(outdir,
775 toc.write(toc_template.render(
777 title=underline_text(title_tmpl % id_, '='),
781 hltoc.write('* :ref:`%s`\n' % label)
785 # now a complete list of all packages
786 hltoc.write(underline_text('Complete list', '-'))
787 toc = codecs.open(os.path.join(outdir, 'pkglists', 'pkgs-all.rst'),
789 toc.write(toc_template.render(label='full_pkg_list',
790 title=underline_text('Complete package list', '='),
791 pkgs=db.keys(), db=db))
793 hltoc.write('* :ref:`full_pkg_list`\n')
796 # and now each individual package page
797 pkg_template = jinja_env.get_template('pkg.rst')
799 page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
800 # when no page is available skip this package
803 pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
808 def prepOptParser(op):
809 # use module docstring for help output
810 op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
812 op.add_option("--db",
813 action="store", type="string", dest="db",
815 help="Database file to read. Default: None")
817 op.add_option("--cfg",
818 action="store", type="string", dest="cfg",
820 help="Repository config file.")
822 op.add_option("-o", "--outdir",
823 action="store", type="string", dest="outdir",
825 help="Target directory for ReST output. Default: None")
827 op.add_option("-r", "--release-url",
828 action="append", dest="release_urls",
831 op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
832 type="string", default=None, help="None")
834 op.add_option("--extracts", action="store", dest="extracts_dir",
835 type="string", default=None, help="None")
839 op = OptionParser(version="%prog 0.0.2")
842 (opts, args) = op.parse_args()
845 print('There needs to be exactly one command')
851 print("'--cfg' option is mandatory.")
854 print("'--db' option is mandatory.")
858 cfg = SafeConfigParser()
861 # load existing db, unless renew is requested
862 if cmd == 'updatedb':
864 if cfg.has_option('packages', 'select taskfiles'):
865 db = add_pkgfromtaskfile(db, cfg.get('packages',
866 'select taskfiles').split())
868 # add additional package names from config file
869 if cfg.has_option('packages', 'select names'):
870 for p in cfg.get('packages', 'select names').split():
871 if not db.has_key(p):
872 db[p] = get_emptydbentry()
874 # get info from task files
875 if cfg.has_option('packages', 'prospective'):
876 for url in cfg.get('packages', 'prospective').split():
877 db = import_blendstask(cfg, db, url)
879 # parse NeuroDebian repository
880 if cfg.has_option('neurodebian', 'releases'):
881 for rurl in cfg.get('neurodebian', 'releases').split():
882 db = import_release(cfg, db, rurl)
884 # collect package information from DDE
885 db = import_dde(cfg, db)
886 # get info from NITRC
887 db = import_nitrc(cfg, db)
889 store_db(db, opts.db)
893 # load the db from file
894 db = read_db(opts.db)
897 jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
899 # generate package pages and TOC and write them to files
900 write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
902 write_sourceslist(jinja_env, cfg, opts.outdir)
904 if __name__ == "__main__":