sphinx/sphinxext/feed/__init__.py

   1 # -*- coding: utf-8 -*-
   2 from fsdict import FSDict
   3 import feedgenerator
   4 from urllib import quote_plus
   5 import os.path
   6 import re
   7
   8 #global
   9 feed_entries = None
  10
  11 #constant unlikely to occur in a docname and legal as a filename
  12 MAGIC_SEPARATOR = '---###---'
  13
  14 def parse_date(datestring):
  15     try:
  16         parser = parse_date.parser
  17     except AttributeError:
  18         import dateutil.parser
  19         parser = dateutil.parser.parser()
  20         parse_date.parser = parser
  21     return parser.parse(datestring)
  22
  23 def setup(app):
  24     """
  25     see: http://sphinx.pocoo.org/ext/appapi.html
  26     this is the primary extension point for Sphinx
  27     """
  28     from sphinx.application import Sphinx
  29     if not isinstance(app, Sphinx): return
  30     app.add_config_value('feed_base_url', '', 'html')
  31     app.add_config_value('feed_description', '', 'html')
  32     app.add_config_value('feed_filename', 'rss.xml', 'html')
  33     app.add_config_value('feed_title', '', 'html')
  34     app.add_config_value('feed_subtitle', '', 'html')
  35     app.add_config_value('feed_author_name', '', 'html')
  36     app.add_config_value('feed_author_email', '', 'html')
  37     app.add_config_value('feed_categories', [], 'html')
  38     app.add_config_value('feed_variants',
  39                          {'all': {'filename': 'rss.xml', 'categories': None}},
  40                          'html')
  41     app.connect('html-page-context', create_feed_item)
  42     app.connect('build-finished', emit_feed)
  43     app.connect('builder-inited', create_feed_container)
  44     app.connect('env-purge-doc', remove_dead_feed_item)
  45
  46 def create_feed_container(app):
  47     """
  48     create lazy filesystem stash for keeping RSS entry fragments, since we
  49     don't want to store the entire site in the environment (in fact, even if
  50     we did, it wasn't persisting for some reason.)
  51     """
  52     global feed_entries
  53     rss_fragment_path = os.path.realpath(os.path.join(app.outdir, '..', 'rss_entry_fragments'))
  54     feed_entries = FSDict(work_dir=rss_fragment_path)
  55     app.builder.env.feed_url = app.config.feed_base_url + '/' + \
  56         app.config.feed_filename
  57
  58 def inject_feed_url(app, pagename, templatename, ctx, doctree):
  59     #We like to provide our templates with a way to link to the rss output file
  60     ctx['rss_link'] = app.builder.env.feed_url #app.config.feed_base_url + '/' + app.config.feed_filename
  61
  62
  63 def create_feed_item(app, pagename, templatename, ctx, doctree):
  64     """
  65     Here we have access to nice HTML fragments to use in, say, an RSS feed.
  66     We serialize them to disk so that we get them preserved across builds.
  67
  68     We also inject useful metadata into the context here.
  69     """
  70     global feed_entries
  71     from absolutify_urls import absolutify
  72     metadata = app.builder.env.metadata.get(pagename, {})
  73
  74     if 'date' not in metadata:
  75         return #don't index dateless articles
  76     try:
  77         pub_date = parse_date(metadata['date'])
  78         app.builder.env.metadata.get(pagename, {})
  79     except ValueError, exc:
  80         #probably a nonsensical date
  81         app.builder.warn('date parse error: ' + str(exc) + ' in ' + pagename)
  82         return
  83
  84     # RSS item attributes, w/defaults:
  85     #     title, link, description, author_email=None,
  86     #     author_name=None, author_link=None, pubdate=None, comments=None,
  87     #     unique_id=None, enclosure=None, categories=(), item_copyright=None,
  88     #     ttl=None,
  89     link = app.config.feed_base_url + '/' + ctx['current_page_name'] + ctx['file_suffix']
  90     # bring main body of the feed item into shape
  91     body = ctx.get('body')
  92     # remove all header links (they make ugly characters in feed readers)
  93     body = re.sub('\<a class\="headerlink".*\>.</a\>', '', body)
  94
  95     item = {
  96       'title': ctx.get('title'),
  97       'link': link,
  98       'unique_id': link,
  99       'description': absolutify(body, link),
 100       'pubdate': pub_date,
 101       'categories': ()
 102     }
 103     if 'tags' in metadata:
 104         item['categories'] = metadata['tags'].split(",")
 105     if 'author' in metadata:
 106         item['author_name'] = metadata['author']
 107     else:
 108         item['author_name'] = app.config.feed_author_name
 109     if 'author_email' in metadata:
 110         item['author_email'] = metadata['author_email']
 111     else:
 112         item['author_email'] = app.config.feed_author_email
 113     feed_entries[nice_name(pagename, pub_date)] = item
 114
 115     #Now, useful variables to keep in context
 116     ctx['rss_link'] = app.builder.env.feed_url
 117     ctx['pub_date'] = pub_date
 118
 119 def remove_dead_feed_item(app, env, docname):
 120     """
 121     TODO:
 122     purge unwanted crap
 123     """
 124     global feed_entries
 125     munged_name = ''.join([MAGIC_SEPARATOR,quote_plus(docname)])
 126     for name in feed_entries:
 127         if name.endswith(munged_name):
 128             del(feed_entries[name])
 129
 130 def emit_feed(app, exc):
 131     global feed_entries
 132     import os.path
 133
 134     title = app.config.feed_title
 135     if not title:
 136         title = app.config.project
 137
 138     feed_dict = {
 139       'title': title,
 140       'subtitle': app.config.feed_subtitle,
 141       'link': app.config.feed_base_url,
 142       'feed_url': app.config.feed_base_url,
 143       'description': app.config.feed_description,
 144       'categories': app.config.feed_categories,
 145       'author_name': app.config.feed_author_name,
 146       'author_email': app.config.feed_author_email
 147     }
 148     if app.config.language:
 149         feed_dict['language'] = app.config.language
 150     if app.config.copyright:
 151         feed_dict['feed_copyright'] = app.config.copyright
 152     # sort items
 153     ordered_keys = feed_entries.keys()
 154     ordered_keys.sort(reverse=True)
 155     # loop over all feed variants
 156     for feedvar in app.config.feed_variants:
 157         feedvar_settings = app.config.feed_variants[feedvar]
 158         feed = feedgenerator.Rss201rev2Feed(**feed_dict)
 159         app.builder.env.feed_feed = feed
 160         for key in ordered_keys:
 161             item = feed_entries[key]
 162             # only take the ones that should be in this feed
 163             if feedvar_settings['tag'] is None \
 164                     or feedvar_settings['tag'] in item['categories']:
 165                 feed.add_item(**feed_entries[key])
 166         outfilename = os.path.join(app.builder.outdir,
 167           feedvar_settings['filename'])
 168         # make sure the directory exists
 169         feed_dir = os.path.dirname(outfilename)
 170         if feed_dir and not os.path.exists(feed_dir):
 171             os.makedirs(os.path.dirname(outfilename))
 172         fp = open(outfilename, 'w')
 173         feed.write(fp, 'utf-8')
 174         fp.close()
 175
 176 def nice_name(docname, date):
 177     """
 178     we need convenient filenames which incorporate dates for ease of sorting and
 179     guid for uniqueness, plus will work in the FS without inconvenient
 180     characters. NB, at the moment, hour of publication is ignored.
 181     """
 182     return quote_plus(MAGIC_SEPARATOR.join([date.isoformat(), docname]))