2 """Tell me who you are!
7 from debian_bundle import deb822
9 # Lets first assure no guarding (but annoying) warnings
11 warnings.simplefilter('ignore', FutureWarning)
12 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
15 from ConfigParser import SafeConfigParser
16 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
28 from jinja2 import Environment, PackageLoader
30 from pprint import PrettyPrinter
33 class AptListsCache(object):
34 def __init__(self, cachedir='build/cache',
37 self.cachedir = cachedir
39 if not ro_cachedirs is None:
40 self.ro_cachedirs = ro_cachedirs
42 self.ro_cachedirs = []
45 create_dir(self.cachedir)
47 def get(self, url, update=False):
48 """Looks in the cache if the file is there and takes the cached one.
49 Otherwise it is downloaded first.
51 Knows how to deal with http:// and svn:// URLs.
56 # look whether it is compressed
57 cext = url.split('.')[-1]
58 if cext in ['gz', 'bz2']:
59 target_url = url[:-1 * len(cext) -1]
61 # assume not compressed
65 # turn url into a filename -- mimik what APT does for
67 tfilename = '_'.join(target_url.split('/')[2:])
69 # if we need to download anyway do not search
71 cfilename = os.path.join(self.cachedir, tfilename)
73 # look for the uncompressed file anywhere in the cache
75 for cp in [self.cachedir] + self.ro_cachedirs:
76 if os.path.exists(os.path.join(cp, tfilename)):
77 cfilename = os.path.join(cp, tfilename)
82 cfilename = os.path.join(self.cachedir, tfilename)
85 # if updated needed -- download
87 #print 'Caching file from %s' % url
89 if url.startswith('svn://'):
91 pysvn.Client().export(url, cfilename)
92 if url.startswith('http://'):
94 tempfile, ignored = urllib.urlretrieve(url)
101 decompressor = 'bzip2'
106 "Don't know how to decompress %s files" \
109 if not decompressor is None:
110 if subprocess.call([decompressor, '-d', '-q', '-f',
112 raise RuntimeError, \
113 "Something went wrong while decompressing '%s'" \
116 # move decompressed file into cache
117 shutil.move(os.path.splitext(tempfile)[0], cfilename)
119 # XXX do we need that if explicit filename is provided?
123 fh = codecs.open(cfilename, 'r', 'utf-8')
128 def add_pkgfromtaskfile(db, urls):
129 cache = AptListsCache()
135 # loop over all stanzas
136 for stanza in deb822.Packages.iter_paragraphs(fh):
137 if stanza.has_key('Depends'):
138 pkg = stanza['Depends']
139 elif stanza.has_key('Recommends'):
140 pkg = stanza['Recommends']
141 elif stanza.has_key('Suggests'):
142 pkg = stanza['Suggests']
146 # account for multiple packages per line
148 pkgs += [p.strip() for p in pkg.split(',')]
150 pkgs.append(pkg.strip())
153 if not db.has_key(p):
154 db[p] = get_emptydbentry()
158 def get_emptydbentry():
161 def import_blendstask(cfg, db, url):
162 cache = AptListsCache()
166 # figure out blend's task page URL, since they differ from blend to blend
167 urlsec = url.split('/')
168 blendname = urlsec[-3]
169 if blendname == 'debian-med':
170 taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
171 elif blendname == 'debian-science':
172 taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
174 raise ValueError('Unknown blend "%s"' % blendname)
175 taskpage_url += urlsec[-1]
177 for st in deb822.Packages.iter_paragraphs(fh):
178 if st.has_key('Task'):
179 task_name = st['Task']
180 task = (blendname, task_name, taskpage_url)
182 if st.has_key('Depends'):
184 elif st.has_key('Recommends'):
185 pkg = st['Recommends']
186 elif st.has_key('Suggests'):
189 # print 'Warning: Cannot determine name of prospective package ' \
190 # '... ignoring. Dump follows:'
194 # take care of pkg lists
195 for p in pkg.split(', '):
196 if not db.has_key(p):
197 print 'Ignoring blend package "%s"' % p
203 info['tasks'] = [task]
204 if st.has_key('License'):
205 info['license'] = st['License']
206 if st.has_key('Responsible'):
207 info['responsible'] = st['Responsible']
210 if st.has_key('Pkg-Description'):
211 descr = st['Pkg-Description'].split('\n')
212 info['description'] = descr[0].strip()
213 info['long_description'] = \
214 u'\n'.join(descr[1:])
216 # charge the basic property set
217 db[p]['main']['description'] = info['description']
218 db[p]['main']['long_description'] = info['long_description']
219 if st.has_key('WNPP'):
220 db[p]['main']['debian_itp'] = st['WNPP']
221 if st.has_key('Pkg-URL'):
222 db[p]['main']['other_pkg'] = st['Pkg-URL']
223 if st.has_key('Homepage'):
224 db[p]['main']['homepage'] = st['Homepage']
227 if st.has_key('Published-Title'):
228 title = st['Published-Title']
230 # trip trailing dot -- added later
231 pub = {'title': title[:-1]}
233 pub = {'title': title}
234 if st.has_key('Published-Authors'):
235 pub['authors'] = st['Published-Authors']
236 if st.has_key('Published-Year'):
237 pub['year'] = st['Published-Year']
238 if st.has_key('Published-In'):
239 pub['in'] = st['Published-In']
240 if st.has_key('Published-URL'):
241 pub['url'] = st['Published-URL']
242 if st.has_key('Published-DOI'):
243 pub['doi'] = st['Published-DOI']
244 # need at least one URL
245 if not pub.has_key('url'):
246 pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
248 db[p]['main']['publication'] = pub
251 if st.has_key('Registration'):
252 db[p]['main']['registration'] = st['Registration']
255 if st.has_key('Remark'):
256 # prepend a single space to make it look like a long description
257 info['remark'] = convert_longdescr(' ' + st['Remark'])
259 # only store if there isn't something already
260 if not db[p].has_key('blends'):
261 db[p]['blends'] = info
263 # just add this tasks name and id
264 db[p]['blends']['tasks'].append(task)
266 # handle pkg name aliases
267 if p in cfg.options('blend package aliases'):
268 src_entry = db[p].copy()
269 # remove original entry
271 # copy the entry into all aliases
272 for alias in cfg.get('blend package aliases', p).split():
273 print "Aliasing %s to %s" % (p, alias)
274 db[alias] = copy.deepcopy(src_entry)
279 def get_releaseinfo(rurl):
280 cache = AptListsCache()
281 # root URL of the repository
282 baseurl = '/'.join(rurl.split('/')[:-1])
283 # get the release file from the cache
284 release_file = cache.get(rurl)
286 # create parser instance
287 rp = deb822.Release(release_file)
289 # architectures on this dist
290 archs = rp['Architectures'].split()
291 components = rp['Components'].split()
292 # compile a new codename that also considers the repository label
293 # to distinguish between official and unofficial repos.
295 origin = rp['Origin']
296 codename = rp['Codename']
297 labelcode = '_'.join([rp['Label'], rp['Codename']])
302 return {'baseurl': baseurl, 'archs': archs, 'components': components,
303 'codename': codename, 'label': label, 'labelcode': labelcode,
307 def build_pkgsurl(baseurl, component, arch):
308 return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
311 def import_release(cfg, db, rurl):
312 cache = AptListsCache()
314 ri = get_releaseinfo(rurl)
316 # compile the list of Packages files to parse and parse them
317 for c in ri['components']:
318 for a in ri['archs']:
319 # compile packages URL
320 pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
322 # retrieve from cache
323 packages_file = cache.get(pkgsurl)
326 for stanza in deb822.Packages.iter_paragraphs(packages_file):
327 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
330 packages_file.close()
334 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
341 # only care for known packages
342 if not db.has_key(pkg):
343 # print 'Ignoring NeuroDebian package "%s"' % pkg
346 distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
348 if db[pkg].has_key(distkey):
349 info = db[pkg][distkey]
351 info = {'architecture': []}
354 if not st['Architecture'] in info['architecture']:
355 info['architecture'].append(st['Architecture'])
356 info['maintainer'] = st['Maintainer']
357 if st.has_key('Homepage'):
358 info['homepage'] = st['Homepage']
359 info['version'] = st['Version']
362 info['distribution'] = origin
363 info['release'] = codename
364 info['component'] = component
367 info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
370 descr = st['Description'].replace('%', '%%').split('\n')
371 info['description'] = descr[0].strip()
372 info['long_description'] = u'\n'.join(descr[1:])
374 db[pkg][distkey] = info
376 # charge the basic property set
377 db[pkg]['main']['description'] = info['description']
378 db[pkg]['main']['long_description'] = info['long_description']
379 if st.has_key('Source'):
380 db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
382 db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
383 if st.has_key('Homepage'):
384 db[pkg]['main']['homepage'] = st['Homepage']
385 if st.has_key('Recommends'):
386 db[pkg]['main']['recommends'] = st['Recommends']
391 def trans_codename(codename, cfg):
392 """Translate a known codename into a release description.
394 Unknown codenames will simply be returned as is.
396 # if we know something, tell
397 if codename in cfg.options('release codenames'):
398 return cfg.get('release codenames', codename)
403 def create_dir(path):
404 if os.path.exists(path):
407 ps = path.split(os.path.sep)
409 for i in range(1,len(ps) + 1):
410 p = os.path.sep.join(ps[:i])
412 if not os.path.exists(p):
416 def dde_get(url, fail=False):
417 # enforce delay to be friendly to DDE
420 data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
421 print "SUCCESS:", url
423 except urllib2.HTTPError, e:
424 print "NOINFO:", url, type(e)
426 except urllib2.URLError, e:
427 print "URLERROR:", url, type(e)
429 print "Permanant failure"
431 print "Try again after 30 seconds..."
433 return dde_get(url, fail=True)
434 except (StopIteration):
437 except json.ReadException, e:
438 print "UDD-DOWN?:", url, type(e)
442 def nitrc_get(spec, fail=False):
443 nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
445 # change into this from python 2.6 on
446 #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
447 data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
448 print "NITRC-SUCCESS:", spec
449 except urllib2.HTTPError, e:
450 print "NITRC-NOINFO:", spec, type(e)
452 except urllib2.URLError, e:
453 print "NITRC-URLERROR:", spec, type(e)
455 print "Permanant failure"
457 print "Try again after 30 seconds..."
459 return nitrc_get(spec, fail=True)
463 def parse_nitrc(data):
466 # simplify -- there is only one project in the data
467 project = data['projects'][0]
468 nitrc_filtered = {'downloads': 0,
470 for pkg in project['packages']:
471 for release in pkg['releases']:
472 for file in release['files']:
473 nitrc_filtered['downloads'] += file['download_count']
474 return nitrc_filtered
477 def import_nitrc(cfg, db):
479 if not cfg.has_option("nitrc ids", p):
481 nitrc_spec = cfg.get("nitrc ids", p)
482 nitrc_data = nitrc_get(nitrc_spec)
483 nitrc_excerpt = parse_nitrc(nitrc_data)
484 if not nitrc_excerpt is None:
485 db[p]['nitrc'] = nitrc_excerpt
489 def import_dde(cfg, db):
490 query_url = cfg.get('dde', 'pkgquery_url')
493 q = dde_get(query_url + "/packages/all/%s" % p)
495 # copy all stuff, while preserving non-overlapping information
496 for k, v in q.iteritems():
498 # get latest popcon info for debian and ubuntu
499 # cannot use origin field itself, since it is none for few packages
501 origin = q['drc'].split()[0]
502 if origin == 'ubuntu':
503 if q.has_key('popcon'):
504 db[p]['main']['ubuntu_popcon'] = q['popcon']
505 # if we have ubuntu, need to get debian
506 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
507 if q and q.has_key('popcon'):
508 db[p]['main']['debian_popcon'] = q['popcon']
509 elif origin == 'debian':
510 if q.has_key('popcon'):
511 db[p]['main']['debian_popcon'] = q['popcon']
512 # if we have debian, need to get ubuntu
513 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
514 if q and q.has_key('popcon'):
515 db[p]['main']['ubuntu_popcon'] = q['popcon']
517 print("Ignoring unkown origin '%s' for package '%s'." \
520 # now get info for package from all releases in UDD
521 q = dde_get(query_url + "/dist/p:%s" % p)
524 # hold all info about this package per distribution release
527 distkey = (trans_codename(cp['release'], cfg),
528 "%s-%s" % (cp['distribution'], cp['release']))
529 if not info.has_key(distkey):
531 # turn into a list to append others later
532 info[distkey]['architecture'] = [info[distkey]['architecture']]
533 # accumulate data for multiple over archs
535 comp = apt.VersionCompare(cp['version'],
536 info[distkey]['version'])
537 # found another arch for the same version
539 info[distkey]['architecture'].append(cp['architecture'])
540 # found newer version, dump the old ones
543 # turn into a list to append others later
544 info[distkey]['architecture'] = [info[distkey]['architecture']]
545 # simply ignore older versions
549 # finally assign the new package data
550 for k, v in info.iteritems():
555 def assure_unicode(s):
556 """Assure that argument is unicode
558 Necessary if strings are not carrying out Pythonish 'u' prefix to
559 signal UTF8 strings, but are in fact UTF8
561 if type(s) is unicode:
564 # attempt regular unicode call and if fails -- just decode it
568 except UnicodeDecodeError, e:
569 return s.decode('utf8')
571 return assure_unicode(str(s))
574 def convert_longdescr(ld):
575 ld = ld.replace('% ', '%% ')
577 for i, l in enumerate(ld):
579 ld[i] = ' #NEWLINEMARKER#'
580 # look for embedded lists
581 elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
582 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
584 ld = u' '.join([l[1:] for l in ld])
585 ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
586 # cleanup any leftover (e.g. trailing markers)
587 ld = ld.replace('#NEWLINEMARKER#', '')
588 # safe-guard ReST active symbols
589 ld = re.sub(r'([\'`*])', r'\\\1', ld)
593 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
595 # local binding for ease of use
597 # do nothing if there is not at least the very basic stuff
598 if not pkgdb['main'].has_key('description'):
600 title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
601 underline = '*' * (len(title) + 2)
602 title = '%s\n %s\n%s' % (underline, title, underline)
605 if 'sv' in pkgdb['main']:
606 ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
607 if not os.path.exists(ex_dir):
609 page = template.render(
612 long_description=convert_longdescr(
613 assure_unicode(pkgdb['main']['long_description'])),
619 # the following can be replaced by something like
620 # {% include "sidebar.html" ignore missing %}
621 # in the template whenever jinja 2.2 becomes available
622 addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
623 if os.path.exists(addenum):
624 page += '\n\n.. include:: %s\n' % addenum
628 def store_db(db, filename):
629 pp = PrettyPrinter(indent=2)
630 f = codecs.open(filename, 'w', 'utf-8')
631 f.write(pp.pformat(db))
635 def read_db(filename):
636 f = codecs.open(filename, 'r', 'utf-8')
640 def write_sourceslist(jinja_env, cfg, outdir):
642 create_dir(os.path.join(outdir, '_static'))
645 for release in cfg.options('release codenames'):
646 if release == 'data':
647 # no seperate list for the data archive
649 transrel = trans_codename(release, cfg)
651 for mirror in cfg.options('mirrors'):
652 listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
653 repos[transrel].append((mirror, listname))
654 lf = open(os.path.join(outdir, '_static', listname), 'w')
655 for rel in ('data', release):
656 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
658 lf.write('deb %s' % aptcfg)
659 lf.write('#deb-src %s' % aptcfg)
662 srclist_template = jinja_env.get_template('sources_lists.rst')
663 sl = open(os.path.join(outdir, 'sources_lists'), 'w')
664 sl.write(srclist_template.render(repos=repos))
668 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
670 create_dir(os.path.join(outdir, 'pkgs'))
672 # generate the TOC with all packages
673 toc_template = jinja_env.get_template('pkgs_toc.rst')
674 toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
675 # this is a fragile test
676 toc.write(toc_template.render(
677 pkgs=[k for k in db.keys()
678 if not ('Datasets (data)', 'neurodebian-data') in db[k]]))
680 # and now only for dataset packages
681 toc_template = jinja_env.get_template('datasets_toc.rst')
682 toc = codecs.open(os.path.join(outdir, 'datasets.rst'), 'w', 'utf-8')
683 # this is a fragile test
684 toc.write(toc_template.render(
685 pkgs=[k for k in db.keys()
686 if ('Datasets (data)', 'neurodebian-data') in db[k]]))
690 # and now each individual package page
691 pkg_template = jinja_env.get_template('pkg.rst')
693 page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
694 # when no page is available skip this package
697 pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
702 def prepOptParser(op):
703 # use module docstring for help output
704 op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
706 op.add_option("--db",
707 action="store", type="string", dest="db",
709 help="Database file to read. Default: None")
711 op.add_option("--cfg",
712 action="store", type="string", dest="cfg",
714 help="Repository config file.")
716 op.add_option("-o", "--outdir",
717 action="store", type="string", dest="outdir",
719 help="Target directory for ReST output. Default: None")
721 op.add_option("-r", "--release-url",
722 action="append", dest="release_urls",
725 op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
726 type="string", default=None, help="None")
728 op.add_option("--extracts", action="store", dest="extracts_dir",
729 type="string", default=None, help="None")
733 op = OptionParser(version="%prog 0.0.2")
736 (opts, args) = op.parse_args()
739 print('There needs to be exactly one command')
745 print("'--cfg' option is mandatory.")
748 print("'--db' option is mandatory.")
752 cfg = SafeConfigParser()
755 # load existing db, unless renew is requested
756 if cmd == 'updatedb':
758 if cfg.has_option('packages', 'select taskfiles'):
759 db = add_pkgfromtaskfile(db, cfg.get('packages',
760 'select taskfiles').split())
762 # add additional package names from config file
763 if cfg.has_option('packages', 'select names'):
764 for p in cfg.get('packages', 'select names').split():
765 if not db.has_key(p):
766 db[p] = get_emptydbentry()
768 # get info from task files
769 if cfg.has_option('packages', 'prospective'):
770 for url in cfg.get('packages', 'prospective').split():
771 db = import_blendstask(cfg, db, url)
773 # parse NeuroDebian repository
774 if cfg.has_option('neurodebian', 'releases'):
775 for rurl in cfg.get('neurodebian', 'releases').split():
776 db = import_release(cfg, db, rurl)
778 # collect package information from DDE
779 db = import_dde(cfg, db)
780 # get info from NITRC
781 db = import_nitrc(cfg, db)
783 store_db(db, opts.db)
787 # load the db from file
788 db = read_db(opts.db)
791 jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
793 # generate package pages and TOC and write them to files
794 write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
796 write_sourceslist(jinja_env, cfg, opts.outdir)
798 if __name__ == "__main__":