2 """Tell me who you are!
7 from debian_bundle import deb822
9 # Lets first assure no guarding (but annoying) warnings
11 warnings.simplefilter('ignore', FutureWarning)
12 warnings.filterwarnings('ignore', 'Module debian_bundle was already imported.*', UserWarning)
15 from ConfigParser import SafeConfigParser
16 from optparse import OptionParser, Option, OptionGroup, OptionConflictError
27 from jinja2 import Environment, PackageLoader
29 from pprint import PrettyPrinter
32 class AptListsCache(object):
33 def __init__(self, cachedir='build/cache',
36 self.cachedir = cachedir
38 if not ro_cachedirs is None:
39 self.ro_cachedirs = ro_cachedirs
41 self.ro_cachedirs = []
44 create_dir(self.cachedir)
46 def get(self, url, update=False):
47 """Looks in the cache if the file is there and takes the cached one.
48 Otherwise it is downloaded first.
50 Knows how to deal with http:// and svn:// URLs.
55 # look whether it is compressed
56 cext = url.split('.')[-1]
57 if cext in ['gz', 'bz2']:
58 target_url = url[:-1 * len(cext) -1]
60 # assume not compressed
64 # turn url into a filename -- mimik what APT does for
66 tfilename = '_'.join(target_url.split('/')[2:])
68 # if we need to download anyway do not search
70 cfilename = os.path.join(self.cachedir, tfilename)
72 # look for the uncompressed file anywhere in the cache
74 for cp in [self.cachedir] + self.ro_cachedirs:
75 if os.path.exists(os.path.join(cp, tfilename)):
76 cfilename = os.path.join(cp, tfilename)
81 cfilename = os.path.join(self.cachedir, tfilename)
84 # if updated needed -- download
86 #print 'Caching file from %s' % url
88 if url.startswith('svn://'):
90 pysvn.Client().export(url, cfilename)
91 if url.startswith('http://'):
93 tempfile, ignored = urllib.urlretrieve(url)
100 decompressor = 'bzip2'
105 "Don't know how to decompress %s files" \
108 if not decompressor is None:
109 if subprocess.call([decompressor, '-d', '-q', '-f',
111 raise RuntimeError, \
112 "Something went wrong while decompressing '%s'" \
115 # move decompressed file into cache
116 shutil.move(os.path.splitext(tempfile)[0], cfilename)
118 # XXX do we need that if explicit filename is provided?
122 fh = codecs.open(cfilename, 'r', 'utf-8')
127 def add_pkgfromtaskfile(db, urls):
128 cache = AptListsCache()
134 # loop over all stanzas
135 for stanza in deb822.Packages.iter_paragraphs(fh):
136 if stanza.has_key('Depends'):
137 pkg = stanza['Depends']
138 elif stanza.has_key('Recommends'):
139 pkg = stanza['Recommends']
140 elif stanza.has_key('Suggests'):
141 pkg = stanza['Suggests']
145 # account for multiple packages per line
147 pkgs += [p.strip() for p in pkg.split(',')]
149 pkgs.append(pkg.strip())
152 if not db.has_key(p):
153 db[p] = get_emptydbentry()
157 def get_emptydbentry():
160 def import_blendstask(cfg, db, url):
161 cache = AptListsCache()
165 # figure out blend's task page URL, since they differ from blend to blend
166 urlsec = url.split('/')
167 blendname = urlsec[-3]
168 if blendname == 'debian-med':
169 taskpage_url = 'http://debian-med.alioth.debian.org/tasks/'
170 elif blendname == 'debian-science':
171 taskpage_url = 'http://blends.alioth.debian.org/science/tasks/'
173 raise ValueError('Unknown blend "%s"' % blendname)
174 taskpage_url += urlsec[-1]
176 for st in deb822.Packages.iter_paragraphs(fh):
177 if st.has_key('Task'):
178 task_name = st['Task']
179 task = (blendname, task_name, taskpage_url)
181 if st.has_key('Depends'):
183 elif st.has_key('Recommends'):
184 pkg = st['Recommends']
185 elif st.has_key('Suggests'):
188 # print 'Warning: Cannot determine name of prospective package ' \
189 # '... ignoring. Dump follows:'
193 # take care of pkg lists
194 for p in pkg.split(', '):
195 if not db.has_key(p):
196 print 'Ignoring blend package "%s"' % p
202 info['tasks'] = [task]
203 if st.has_key('License'):
204 info['license'] = st['License']
205 if st.has_key('Responsible'):
206 info['responsible'] = st['Responsible']
209 if st.has_key('Pkg-Description'):
210 descr = st['Pkg-Description'].split('\n')
211 info['description'] = descr[0].strip()
212 info['long_description'] = \
213 u'\n'.join(descr[1:])
215 # charge the basic property set
216 db[p]['main']['description'] = info['description']
217 db[p]['main']['long_description'] = info['long_description']
218 if st.has_key('WNPP'):
219 db[p]['main']['debian_itp'] = st['WNPP']
220 if st.has_key('Pkg-URL'):
221 db[p]['main']['other_pkg'] = st['Pkg-URL']
222 if st.has_key('Homepage'):
223 db[p]['main']['homepage'] = st['Homepage']
226 if st.has_key('Published-Title'):
227 title = st['Published-Title']
229 # trip trailing dot -- added later
230 pub = {'title': title[:-1]}
232 pub = {'title': title}
233 if st.has_key('Published-Authors'):
234 pub['authors'] = st['Published-Authors']
235 if st.has_key('Published-Year'):
236 pub['year'] = st['Published-Year']
237 if st.has_key('Published-In'):
238 pub['in'] = st['Published-In']
239 if st.has_key('Published-URL'):
240 pub['url'] = st['Published-URL']
241 if st.has_key('Published-DOI'):
242 pub['doi'] = st['Published-DOI']
243 # need at least one URL
244 if not pub.has_key('url'):
245 pub['url'] = "http://dx.doi.org/%s" % st['Published-DOI']
247 db[p]['main']['publication'] = pub
250 if st.has_key('Registration'):
251 db[p]['main']['registration'] = st['Registration']
254 if st.has_key('Remark'):
255 # prepend a single space to make it look like a long description
256 info['remark'] = convert_longdescr(' ' + st['Remark'])
258 # only store if there isn't something already
259 if not db[p].has_key('blends'):
260 db[p]['blends'] = info
262 # just add this tasks name and id
263 db[p]['blends']['tasks'].append(task)
265 # handle pkg name aliases
266 if p in cfg.options('blend package aliases'):
267 src_entry = db[p].copy()
268 # remove original entry
270 # copy the entry into all aliases
271 for alias in cfg.get('blend package aliases', p).split():
272 print "Aliasing %s to %s" % (p, alias)
273 db[alias] = copy.deepcopy(src_entry)
278 def get_releaseinfo(rurl):
279 cache = AptListsCache()
280 # root URL of the repository
281 baseurl = '/'.join(rurl.split('/')[:-1])
282 # get the release file from the cache
283 release_file = cache.get(rurl)
285 # create parser instance
286 rp = deb822.Release(release_file)
288 # architectures on this dist
289 archs = rp['Architectures'].split()
290 components = rp['Components'].split()
291 # compile a new codename that also considers the repository label
292 # to distinguish between official and unofficial repos.
294 origin = rp['Origin']
295 codename = rp['Codename']
296 labelcode = '_'.join([rp['Label'], rp['Codename']])
301 return {'baseurl': baseurl, 'archs': archs, 'components': components,
302 'codename': codename, 'label': label, 'labelcode': labelcode,
306 def build_pkgsurl(baseurl, component, arch):
307 return '/'.join([baseurl, component, 'binary-' + arch, 'Packages.bz2'])
310 def import_release(cfg, db, rurl):
311 cache = AptListsCache()
313 ri = get_releaseinfo(rurl)
315 # compile the list of Packages files to parse and parse them
316 for c in ri['components']:
317 for a in ri['archs']:
318 # compile packages URL
319 pkgsurl = build_pkgsurl(ri['baseurl'], c, a)
321 # retrieve from cache
322 packages_file = cache.get(pkgsurl)
325 for stanza in deb822.Packages.iter_paragraphs(packages_file):
326 db = _store_pkg(cfg, db, stanza, ri['origin'], ri['codename'], c, ri['baseurl'])
329 packages_file.close()
333 def _store_pkg(cfg, db, st, origin, codename, component, baseurl):
340 # only care for known packages
341 if not db.has_key(pkg):
342 # print 'Ignoring NeuroDebian package "%s"' % pkg
345 distkey = (trans_codename(codename, cfg), 'neurodebian-' + codename)
347 if db[pkg].has_key(distkey):
348 info = db[pkg][distkey]
350 info = {'architecture': []}
353 if not st['Architecture'] in info['architecture']:
354 info['architecture'].append(st['Architecture'])
355 info['maintainer'] = st['Maintainer']
356 if st.has_key('Homepage'):
357 info['homepage'] = st['Homepage']
358 info['version'] = st['Version']
361 info['distribution'] = origin
362 info['release'] = codename
363 info['component'] = component
366 info['poolurl'] = '/'.join([os.path.dirname(st['Filename'])])
369 descr = st['Description'].replace('%', '%%').split('\n')
370 info['description'] = descr[0].strip()
371 info['long_description'] = u'\n'.join(descr[1:])
373 db[pkg][distkey] = info
375 # charge the basic property set
376 db[pkg]['main']['description'] = info['description']
377 db[pkg]['main']['long_description'] = info['long_description']
378 if st.has_key('Source'):
379 db[pkg]['main']['sv'] = "%s %s" % (st['Source'], st['Version'])
381 db[pkg]['main']['sv'] = "%s %s" % (st['Package'], st['Version'])
382 if st.has_key('Homepage'):
383 db[pkg]['main']['homepage'] = st['Homepage']
384 if st.has_key('Recommends'):
385 db[pkg]['main']['recommends'] = st['Recommends']
390 def trans_codename(codename, cfg):
391 """Translate a known codename into a release description.
393 Unknown codenames will simply be returned as is.
395 # if we know something, tell
396 if codename in cfg.options('release codenames'):
397 return cfg.get('release codenames', codename)
402 def create_dir(path):
403 if os.path.exists(path):
406 ps = path.split(os.path.sep)
408 for i in range(1,len(ps) + 1):
409 p = os.path.sep.join(ps[:i])
411 if not os.path.exists(p):
415 def dde_get(url, fail=False):
416 # enforce delay to be friendly to DDE
419 data = json.read(urllib2.urlopen(url+"?t=json").read())['r']
420 print "SUCCESS:", url
422 except urllib2.HTTPError, e:
423 print "NOINFO:", url, type(e)
425 except urllib2.URLError, e:
426 print "URLERROR:", url, type(e)
428 print "Permanant failure"
430 print "Try again after 30 seconds..."
432 return dde_get(url, fail=True)
433 except (StopIteration):
436 except json.ReadException, e:
437 print "UDD-DOWN?:", url, type(e)
441 def nitrc_get(spec, fail=False):
442 nitrc_url = 'http://www.nitrc.org/export/site/projects.json.php'
444 # change into this from python 2.6 on
445 #data = json.loads(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
446 data = json.read(urllib2.urlopen(nitrc_url + '?spec=%s' % spec).read())
447 print "NITRC-SUCCESS:", spec
448 except urllib2.HTTPError, e:
449 print "NITRC-NOINFO:", spec, type(e)
451 except urllib2.URLError, e:
452 print "NITRC-URLERROR:", spec, type(e)
454 print "Permanant failure"
456 print "Try again after 30 seconds..."
458 return nitrc_get(spec, fail=True)
462 def parse_nitrc(data):
465 # simplify -- there is only one project in the data
466 project = data['projects'][0]
467 nitrc_filtered = {'downloads': 0,
469 for pkg in project['packages']:
470 for release in pkg['releases']:
471 for file in release['files']:
472 nitrc_filtered['downloads'] += file['download_count']
473 return nitrc_filtered
476 def import_nitrc(cfg, db):
478 if not cfg.has_option("nitrc ids", p):
480 nitrc_spec = cfg.get("nitrc ids", p)
481 nitrc_data = nitrc_get(nitrc_spec)
482 nitrc_excerpt = parse_nitrc(nitrc_data)
483 if not nitrc_excerpt is None:
484 db[p]['nitrc'] = nitrc_excerpt
488 def import_dde(cfg, db):
489 query_url = cfg.get('dde', 'pkgquery_url')
492 q = dde_get(query_url + "/packages/all/%s" % p)
494 # copy all stuff, while preserving non-overlapping information
495 for k, v in q.iteritems():
497 # get latest popcon info for debian and ubuntu
498 # cannot use origin field itself, since it is none for few packages
500 origin = q['drc'].split()[0]
501 if origin == 'ubuntu':
502 if q.has_key('popcon'):
503 db[p]['main']['ubuntu_popcon'] = q['popcon']
504 # if we have ubuntu, need to get debian
505 q = dde_get(query_url + "/packages/prio-debian-sid/%s" % p)
506 if q and q.has_key('popcon'):
507 db[p]['main']['debian_popcon'] = q['popcon']
508 elif origin == 'debian':
509 if q.has_key('popcon'):
510 db[p]['main']['debian_popcon'] = q['popcon']
511 # if we have debian, need to get ubuntu
512 q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
513 if q and q.has_key('popcon'):
514 db[p]['main']['ubuntu_popcon'] = q['popcon']
516 print("Ignoring unkown origin '%s' for package '%s'." \
519 # now get info for package from all releases in UDD
520 q = dde_get(query_url + "/dist/p:%s" % p)
523 # hold all info about this package per distribution release
526 distkey = (trans_codename(cp['release'], cfg),
527 "%s-%s" % (cp['distribution'], cp['release']))
528 if not info.has_key(distkey):
530 # turn into a list to append others later
531 info[distkey]['architecture'] = [info[distkey]['architecture']]
532 # accumulate data for multiple over archs
534 comp = apt.VersionCompare(cp['version'],
535 info[distkey]['version'])
536 # found another arch for the same version
538 info[distkey]['architecture'].append(cp['architecture'])
539 # found newer version, dump the old ones
542 # turn into a list to append others later
543 info[distkey]['architecture'] = [info[distkey]['architecture']]
544 # simply ignore older versions
548 # finally assign the new package data
549 for k, v in info.iteritems():
554 def assure_unicode(s):
555 """Assure that argument is unicode
557 Necessary if strings are not carrying out Pythonish 'u' prefix to
558 signal UTF8 strings, but are in fact UTF8
560 if type(s) is unicode:
563 # attempt regular unicode call and if fails -- just decode it
567 except UnicodeDecodeError, e:
568 return s.decode('utf8')
570 return assure_unicode(str(s))
573 def convert_longdescr(ld):
574 ld = ld.replace('% ', '%% ')
576 for i, l in enumerate(ld):
578 ld[i] = ' #NEWLINEMARKER#'
579 # look for embedded lists
580 elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
581 ld[i] = ' #NEWLINEMARKER# ' + l[2:]
583 ld = u' '.join([l[1:] for l in ld])
584 ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
585 # cleanup any leftover (e.g. trailing markers)
586 ld = ld.replace('#NEWLINEMARKER#', '')
590 def generate_pkgpage(pkg, cfg, db, template, addenum_dir, extracts_dir):
592 # local binding for ease of use
594 # do nothing if there is not at least the very basic stuff
595 if not pkgdb['main'].has_key('description'):
597 title = '**%s** -- %s' % (pkg, pkgdb['main']['description'])
598 underline = '*' * (len(title) + 2)
599 title = '%s\n %s\n%s' % (underline, title, underline)
602 if 'sv' in pkgdb['main']:
603 ex_dir = os.path.join(extracts_dir, pkgdb['main']['sv'].split()[0])
604 if not os.path.exists(ex_dir):
606 page = template.render(
609 long_description=convert_longdescr(
610 assure_unicode(pkgdb['main']['long_description'])),
616 # the following can be replaced by something like
617 # {% include "sidebar.html" ignore missing %}
618 # in the template whenever jinja 2.2 becomes available
619 addenum = os.path.join(os.path.abspath(addenum_dir), '%s.rst' % pkg)
620 if os.path.exists(addenum):
621 page += '\n\n.. include:: %s\n' % addenum
625 def store_db(db, filename):
626 pp = PrettyPrinter(indent=2)
627 f = codecs.open(filename, 'w', 'utf-8')
628 f.write(pp.pformat(db))
632 def read_db(filename):
633 f = codecs.open(filename, 'r', 'utf-8')
637 def write_sourceslist(jinja_env, cfg, outdir):
639 create_dir(os.path.join(outdir, '_static'))
642 for release in cfg.options('release codenames'):
643 if release == 'data':
644 # no seperate list for the data archive
646 transrel = trans_codename(release, cfg)
648 for mirror in cfg.options('mirrors'):
649 listname = 'neurodebian.%s.%s.sources.list' % (release, mirror)
650 repos[transrel].append((mirror, listname))
651 lf = open(os.path.join(outdir, '_static', listname), 'w')
652 for rel in ('data', release):
653 aptcfg = '%s %s main contrib non-free\n' % (cfg.get('mirrors', mirror),
655 lf.write('deb %s' % aptcfg)
656 lf.write('#deb-src %s' % aptcfg)
659 srclist_template = jinja_env.get_template('sources_lists.rst')
660 sl = open(os.path.join(outdir, 'sources_lists'), 'w')
661 sl.write(srclist_template.render(repos=repos))
665 def write_pkgpages(jinja_env, cfg, db, outdir, addenum_dir, extracts_dir):
667 create_dir(os.path.join(outdir, 'pkgs'))
669 # generate the TOC with all packages
670 toc_template = jinja_env.get_template('pkgs_toc.rst')
671 toc = codecs.open(os.path.join(outdir, 'pkgs.rst'), 'w', 'utf-8')
672 # this is a fragile test
673 toc.write(toc_template.render(
674 pkgs=[k for k in db.keys()
675 if not ('Datasets', 'neurodebian-data') in db[k]]))
677 # and now only for dataset packages
678 toc_template = jinja_env.get_template('datasets_toc.rst')
679 toc = codecs.open(os.path.join(outdir, 'datasets.rst'), 'w', 'utf-8')
680 # this is a fragile test
681 toc.write(toc_template.render(
682 pkgs=[k for k in db.keys()
683 if ('Datasets', 'neurodebian-data') in db[k]]))
687 # and now each individual package page
688 pkg_template = jinja_env.get_template('pkg.rst')
690 page = generate_pkgpage(p, cfg, db, pkg_template, addenum_dir, extracts_dir)
691 # when no page is available skip this package
694 pf = codecs.open(os.path.join(outdir, 'pkgs', p + '.rst'), 'w', 'utf-8')
699 def prepOptParser(op):
700 # use module docstring for help output
701 op.usage = "%s [OPTIONS]\n\n" % sys.argv[0] + __doc__
703 op.add_option("--db",
704 action="store", type="string", dest="db",
706 help="Database file to read. Default: None")
708 op.add_option("--cfg",
709 action="store", type="string", dest="cfg",
711 help="Repository config file.")
713 op.add_option("-o", "--outdir",
714 action="store", type="string", dest="outdir",
716 help="Target directory for ReST output. Default: None")
718 op.add_option("-r", "--release-url",
719 action="append", dest="release_urls",
722 op.add_option("--pkgaddenum", action="store", dest="addenum_dir",
723 type="string", default=None, help="None")
725 op.add_option("--extracts", action="store", dest="extracts_dir",
726 type="string", default=None, help="None")
730 op = OptionParser(version="%prog 0.0.2")
733 (opts, args) = op.parse_args()
736 print('There needs to be exactly one command')
742 print("'--cfg' option is mandatory.")
745 print("'--db' option is mandatory.")
749 cfg = SafeConfigParser()
752 # load existing db, unless renew is requested
753 if cmd == 'updatedb':
755 if cfg.has_option('packages', 'select taskfiles'):
756 db = add_pkgfromtaskfile(db, cfg.get('packages',
757 'select taskfiles').split())
759 # add additional package names from config file
760 if cfg.has_option('packages', 'select names'):
761 for p in cfg.get('packages', 'select names').split():
762 if not db.has_key(p):
763 db[p] = get_emptydbentry()
765 # get info from task files
766 if cfg.has_option('packages', 'prospective'):
767 for url in cfg.get('packages', 'prospective').split():
768 db = import_blendstask(cfg, db, url)
770 # parse NeuroDebian repository
771 if cfg.has_option('neurodebian', 'releases'):
772 for rurl in cfg.get('neurodebian', 'releases').split():
773 db = import_release(cfg, db, rurl)
775 # collect package information from DDE
776 db = import_dde(cfg, db)
777 # get info from NITRC
778 db = import_nitrc(cfg, db)
780 store_db(db, opts.db)
784 # load the db from file
785 db = read_db(opts.db)
788 jinja_env = Environment(loader=PackageLoader('neurodebian', 'templates'))
790 # generate package pages and TOC and write them to files
791 write_pkgpages(jinja_env, cfg, db, opts.outdir, opts.addenum_dir, opts.extracts_dir)
793 write_sourceslist(jinja_env, cfg, opts.outdir)
795 if __name__ == "__main__":