From: Michael Hanke Date: Fri, 18 Mar 2011 19:14:20 +0000 (-0400) Subject: Add RSS feed extension for sphinx. X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=c3873b11733e725bf5fdda4af383e73f981aac62;p=neurodebian.git Add RSS feed extension for sphinx. Taken verbatim from https://bitbucket.org/birkenfeld/sphinx-contrib/src/813d6d58358d/feed/ --- diff --git a/sphinx/conf.py b/sphinx/conf.py index 1e8818d..5ec8c0f 100644 --- a/sphinx/conf.py +++ b/sphinx/conf.py @@ -44,6 +44,7 @@ def artworkdir(): #extensions = [] sys.path.append(os.path.abspath('.')) extensions = ['sphinxext.quote', + 'sphinxext.feed', 'sphinx.ext.todo'] # show todo items @@ -209,3 +210,9 @@ latex_documents = [ # If false, no module index is generated. #latex_use_modindex = True + +# RSS feed +# -------- +feed_base_url = 'http://neuro.debian.net/blog' +feed_description = "Debian for neuroscience and neuroscience in Debian" +feed_filename = 'rss.xml' diff --git a/sphinx/sphinxext/feed/__init__.py b/sphinx/sphinxext/feed/__init__.py new file mode 100644 index 0000000..11a6004 --- /dev/null +++ b/sphinx/sphinxext/feed/__init__.py @@ -0,0 +1,124 @@ +from fsdict import FSDict +import feedgenerator +from urllib import quote_plus +import os.path + +#global +feed_entries = None + +#constant unlikely to occur in a docname and legal as a filename +MAGIC_SEPARATOR = '---###---' + +def setup(app): + """ + see: http://sphinx.pocoo.org/ext/appapi.html + this is the primary extension point for Sphinx + """ + from sphinx.application import Sphinx + if not isinstance(app, Sphinx): return + app.add_config_value('feed_base_url', '', 'html') + app.add_config_value('feed_description', '', 'html') + app.add_config_value('feed_filename', 'rss.xml', 'html') + + app.connect('html-page-context', create_feed_item) + app.connect('html-page-context', inject_feed_url) + app.connect('build-finished', emit_feed) + app.connect('builder-inited', create_feed_container) + app.connect('env-purge-doc', remove_dead_feed_item) + +def create_feed_container(app): + """ + create lazy filesystem stash for keeping RSS entry fragments, since we don't + want to store the entire site in the environment (in fact, even if we did, + it wasn't persisting for some reason.) + """ + global feed_entries + rss_fragment_path = os.path.realpath(os.path.join(app.outdir, '..', 'rss_entry_fragments')) + feed_entries = FSDict(work_dir=rss_fragment_path) + app.builder.env.feed_url = app.config.feed_base_url + '/' + \ + app.config.feed_filename + +def inject_feed_url(app, pagename, templatename, ctx, doctree): + #We like to provide our templates with a way to link to the rss output file + ctx['rss_link'] = app.builder.env.feed_url #app.config.feed_base_url + '/' + app.config.feed_filename + +def create_feed_item(app, pagename, templatename, ctx, doctree): + """ + Here we have access to nice HTML fragments to use in, say, an RSS feed. + We serialize them to disk so that we get them preserved across builds. + """ + global feed_entries + import dateutil.parser + from absolutify_urls import absolutify + date_parser = dateutil.parser.parser() + metadata = app.builder.env.metadata.get(pagename, {}) + + if 'date' not in metadata: + return #don't index dateless articles + try: + pub_date = date_parser.parse(metadata['date']) + except ValueError, exc: + #probably a nonsensical date + app.builder.warn('date parse error: ' + str(exc) + ' in ' + pagename) + return + + # title, link, description, author_email=None, + # author_name=None, author_link=None, pubdate=None, comments=None, + # unique_id=None, enclosure=None, categories=(), item_copyright=None, + # ttl=None, + link = app.config.feed_base_url + '/' + ctx['current_page_name'] + ctx['file_suffix'] + item = { + 'title': ctx.get('title'), + 'link': link, + 'unique_id': link, + 'description': absolutify(ctx.get('body'), link), + 'pubdate': pub_date + } + if 'author' in metadata: + item['author'] = metadata['author'] + feed_entries[nice_name(pagename, pub_date)] = item + +def remove_dead_feed_item(app, env, docname): + """ + TODO: + purge unwanted crap + """ + global feed_entries + munged_name = ''.join([MAGIC_SEPARATOR,quote_plus(docname)]) + for name in feed_entries: + if name.endswith(munged_name): + del(feed_entries[name]) + +def emit_feed(app, exc): + global feed_entries + import os.path + + feed_dict = { + 'title': app.config.project, + 'link': app.config.feed_base_url, + 'feed_url': app.config.feed_base_url, + 'description': app.config.feed_description + } + if app.config.language: + feed_dict['language'] = app.config.language + if app.config.copyright: + feed_dict['feed_copyright'] = app.config.copyright + feed = feedgenerator.Rss201rev2Feed(**feed_dict) + app.builder.env.feed_feed = feed + ordered_keys = feed_entries.keys() + ordered_keys.sort(reverse=True) + for key in ordered_keys: + feed.add_item(**feed_entries[key]) + outfilename = os.path.join(app.builder.outdir, + app.config.feed_filename) + fp = open(outfilename, 'w') + feed.write(fp, 'utf-8') + fp.close() + +def nice_name(docname, date): + """ + we need convenient filenames which incorporate dates for ease of sorting and + guid for uniqueness, plus will work in the FS without inconvenient + characters. NB, at the moment, hour of publication is ignored. + """ + return quote_plus(MAGIC_SEPARATOR.join([date.isoformat(), docname])) diff --git a/sphinx/sphinxext/feed/absolutify_urls.py b/sphinx/sphinxext/feed/absolutify_urls.py new file mode 100644 index 0000000..e892909 --- /dev/null +++ b/sphinx/sphinxext/feed/absolutify_urls.py @@ -0,0 +1,96 @@ +# By Gareth Rees +# http://gareth-rees.livejournal.com/27148.html + +import html5lib +import html5lib.serializer +import html5lib.treewalkers +import urlparse + +# List of (ELEMENT, ATTRIBUTE) for HTML5 attributes which contain URLs. +# Based on the list at http://www.feedparser.org/docs/resolving-relative-links.html +url_attributes = [ + ('a', 'href'), + ('applet', 'codebase'), + ('area', 'href'), + ('blockquote', 'cite'), + ('body', 'background'), + ('del', 'cite'), + ('form', 'action'), + ('frame', 'longdesc'), + ('frame', 'src'), + ('iframe', 'longdesc'), + ('iframe', 'src'), + ('head', 'profile'), + ('img', 'longdesc'), + ('img', 'src'), + ('img', 'usemap'), + ('input', 'src'), + ('input', 'usemap'), + ('ins', 'cite'), + ('link', 'href'), + ('object', 'classid'), + ('object', 'codebase'), + ('object', 'data'), + ('object', 'usemap'), + ('q', 'cite'), + ('script', 'src')] + +def absolutify(src, base_url): + """absolutify(SRC, BASE_URL): Resolve relative URLs in SRC. +SRC is a string containing HTML. All URLs in SRC are resolved relative +to BASE_URL. Return the body of the result as HTML.""" + + # Parse SRC as HTML. + tree_builder = html5lib.treebuilders.getTreeBuilder('dom') + parser = html5lib.html5parser.HTMLParser(tree = tree_builder) + dom = parser.parse(src) + + # Handle if any. + head = dom.getElementsByTagName('head')[0] + for b in head.getElementsByTagName('base'): + u = b.getAttribute('href') + if u: + base_url = urlparse.urljoin(base_url, u) + # HTML5 4.2.3 "if there are multiple base elements with href + # attributes, all but the first are ignored." + break + + # Change all relative URLs to absolute URLs by resolving them + # relative to BASE_URL. Note that we need to do this even for URLs + # that consist only of a fragment identifier, because Google Reader + # changes href=#foo to href=http://site/#foo + for tag, attr in url_attributes: + for e in dom.getElementsByTagName(tag): + u = e.getAttribute(attr) + if u: + e.setAttribute(attr, urlparse.urljoin(base_url, u)) + + # Return the HTML5 serialization of the of the result (we don't + # want the : this breaks feed readers). + body = dom.getElementsByTagName('body')[0] + tree_walker = html5lib.treewalkers.getTreeWalker('dom') + html_serializer = html5lib.serializer.htmlserializer.HTMLSerializer() + return u''.join(html_serializer.serialize(tree_walker(body))) + + +# Alternative option, from http://stackoverflow.com/questions/589833/how-to-find-a-relative-url-and-translate-it-to-an-absolute-url-in-python/589939#589939 +# +# import re, urlparse +# +# find_re = re.compile(r'\bhref\s*=\s*("[^"]*"|\'[^\']*\'|[^"\'<>=\s]+)') +# +# def fix_urls(document, base_url): +# ret = [] +# last_end = 0 +# for match in find_re.finditer(document): +# url = match.group(1) +# if url[0] in "\"'": +# url = url.strip(url[0]) +# parsed = urlparse.urlparse(url) +# if parsed.scheme == parsed.netloc == '': #relative to domain +# url = urlparse.urljoin(base_url, url) +# ret.append(document[last_end:match.start(1)]) +# ret.append('"%s"' % (url,)) +# last_end = match.end(1) +# ret.append(document[last_end:]) +# return ''.join(ret) diff --git a/sphinx/sphinxext/feed/django_support.py b/sphinx/sphinxext/feed/django_support.py new file mode 100644 index 0000000..1505004 --- /dev/null +++ b/sphinx/sphinxext/feed/django_support.py @@ -0,0 +1,170 @@ +""" +utils needed for django's feed generator +""" + +""" +Utilities for XML generation/parsing. +from django.utils.xmlutils import SimplerXMLGenerator +""" + +from xml.sax.saxutils import XMLGenerator + +class SimplerXMLGenerator(XMLGenerator): + def addQuickElement(self, name, contents=None, attrs=None): + "Convenience method for adding an element with no children" + if attrs is None: attrs = {} + self.startElement(name, attrs) + if contents is not None: + self.characters(contents) + self.endElement(name) + +""" +from django.utils.encoding import force_unicode, iri_to_uri +""" +import types +import urllib +import locale +import datetime +import codecs +from decimal import Decimal + +class DjangoUnicodeDecodeError(UnicodeDecodeError): + def __init__(self, obj, *args): + self.obj = obj + UnicodeDecodeError.__init__(self, *args) + + def __str__(self): + original = UnicodeDecodeError.__str__(self) + return '%s. You passed in %r (%s)' % (original, self.obj, + type(self.obj)) + +class StrAndUnicode(object): + """ + A class whose __str__ returns its __unicode__ as a UTF-8 bytestring. + + Useful as a mix-in. + """ + def __str__(self): + return self.__unicode__().encode('utf-8') + +def is_protected_type(obj): + """Determine if the object instance is of a protected type. + + Objects of protected types are preserved as-is when passed to + force_unicode(strings_only=True). + """ + return isinstance(obj, ( + types.NoneType, + int, long, + datetime.datetime, datetime.date, datetime.time, + float, Decimal) + ) + +def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Similar to smart_unicode, except that lazy instances are resolved to + strings, rather than kept as lazy objects. + + If strings_only is True, don't convert (some) non-string-like objects. + """ + if strings_only and is_protected_type(s): + return s + try: + if not isinstance(s, basestring,): + if hasattr(s, '__unicode__'): + s = unicode(s) + else: + try: + s = unicode(str(s), encoding, errors) + except UnicodeEncodeError: + if not isinstance(s, Exception): + raise + # If we get to here, the caller has passed in an Exception + # subclass populated with non-ASCII data without special + # handling to display as a string. We need to handle this + # without raising a further exception. We do an + # approximation to what the Exception's standard str() + # output should be. + s = ' '.join([force_unicode(arg, encoding, strings_only, + errors) for arg in s]) + elif not isinstance(s, unicode): + # Note: We use .decode() here, instead of unicode(s, encoding, + # errors), so that if s is a SafeString, it ends up being a + # SafeUnicode at the end. + s = s.decode(encoding, errors) + except UnicodeDecodeError, e: + if not isinstance(s, Exception): + raise DjangoUnicodeDecodeError(s, *e.args) + else: + # If we get to here, the caller has passed in an Exception + # subclass populated with non-ASCII bytestring data without a + # working unicode method. Try to handle this without raising a + # further exception by individually forcing the exception args + # to unicode. + s = ' '.join([force_unicode(arg, encoding, strings_only, + errors) for arg in s]) + return s + +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a bytestring version of 's', encoded as specified in 'encoding'. + + If strings_only is True, don't convert (some) non-string-like objects. + """ + if strings_only and isinstance(s, (types.NoneType, int)): + return s + elif not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + if isinstance(s, Exception): + # An Exception subclass containing non-ASCII data that doesn't + # know how to print itself properly. We shouldn't raise a + # further exception. + return ' '.join([smart_str(arg, encoding, strings_only, + errors) for arg in s]) + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + +def iri_to_uri(iri): + """ + Convert an Internationalized Resource Identifier (IRI) portion to a URI + portion that is suitable for inclusion in a URL. + + This is the algorithm from section 3.1 of RFC 3987. However, since we are + assuming input is either UTF-8 or unicode already, we can simplify things a + little from the full method. + + Returns an ASCII string containing the encoded result. + """ + # The list of safe characters here is constructed from the "reserved" and + # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986: + # reserved = gen-delims / sub-delims + # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + # / "*" / "+" / "," / ";" / "=" + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + # Of the unreserved characters, urllib.quote already considers all but + # the ~ safe. + # The % character is also added to the list of safe characters here, as the + # end of section 3.1 of RFC 3987 specifically mentions that % must not be + # converted. + if iri is None: + return iri + return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~") + + +# The encoding of the default system locale but falls back to the +# given fallback encoding if the encoding is unsupported by python or could +# not be determined. See tickets #10335 and #5846 +try: + DEFAULT_LOCALE_ENCODING = locale.getdefaultlocale()[1] or 'ascii' + codecs.lookup(DEFAULT_LOCALE_ENCODING) +except: + DEFAULT_LOCALE_ENCODING = 'ascii' + diff --git a/sphinx/sphinxext/feed/feedgenerator.py b/sphinx/sphinxext/feed/feedgenerator.py new file mode 100644 index 0000000..de14b62 --- /dev/null +++ b/sphinx/sphinxext/feed/feedgenerator.py @@ -0,0 +1,351 @@ +""" +Syndication feed generation library -- used for generating RSS, etc. +Included from django http://djangoproject.org/ + +Sample usage: + +>>> from django.utils import feedgenerator +>>> feed = feedgenerator.Rss201rev2Feed( +... title=u"Poynter E-Media Tidbits", +... link=u"http://www.poynter.org/column.asp?id=31", +... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", +... language=u"en", +... ) +>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") +>>> fp = open('test.rss', 'w') +>>> feed.write(fp, 'utf-8') +>>> fp.close() + +For definitions of the different versions of RSS, see: +http://diveintomark.org/archives/2004/02/04/incompatible-rss +""" + +import re +import datetime +from django_support import SimplerXMLGenerator, iri_to_uri, force_unicode + +def rfc2822_date(date): + # We do this ourselves to be timezone aware, email.Utils is not tz aware. + if date.tzinfo: + time_str = date.strftime('%a, %d %b %Y %H:%M:%S ') + offset = date.tzinfo.utcoffset(date) + timezone = (offset.days * 24 * 60) + (offset.seconds / 60) + hour, minute = divmod(timezone, 60) + return time_str + "%+03d%02d" % (hour, minute) + else: + return date.strftime('%a, %d %b %Y %H:%M:%S -0000') + +def rfc3339_date(date): + if date.tzinfo: + time_str = date.strftime('%Y-%m-%dT%H:%M:%S') + offset = date.tzinfo.utcoffset(date) + timezone = (offset.days * 24 * 60) + (offset.seconds / 60) + hour, minute = divmod(timezone, 60) + return time_str + "%+03d:%02d" % (hour, minute) + else: + return date.strftime('%Y-%m-%dT%H:%M:%SZ') + +def get_tag_uri(url, date): + "Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id" + tag = re.sub('^http://', '', url) + if date is not None: + tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) + tag = re.sub('#', '/', tag) + return u'tag:' + tag + +class SyndicationFeed(object): + "Base class for all syndication feeds. Subclasses should provide write()" + def __init__(self, title, link, description, language=None, author_email=None, + author_name=None, author_link=None, subtitle=None, categories=None, + feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): + to_unicode = lambda s: force_unicode(s, strings_only=True) + if categories: + categories = [force_unicode(c) for c in categories] + self.feed = { + 'title': to_unicode(title), + 'link': iri_to_uri(link), + 'description': to_unicode(description), + 'language': to_unicode(language), + 'author_email': to_unicode(author_email), + 'author_name': to_unicode(author_name), + 'author_link': iri_to_uri(author_link), + 'subtitle': to_unicode(subtitle), + 'categories': categories or (), + 'feed_url': iri_to_uri(feed_url), + 'feed_copyright': to_unicode(feed_copyright), + 'id': feed_guid or link, + 'ttl': ttl, + } + self.feed.update(kwargs) + self.items = [] + + def add_item(self, title, link, description, author_email=None, + author_name=None, author_link=None, pubdate=None, comments=None, + unique_id=None, enclosure=None, categories=(), item_copyright=None, + ttl=None, **kwargs): + """ + Adds an item to the feed. All args are expected to be Python Unicode + objects except pubdate, which is a datetime.datetime object, and + enclosure, which is an instance of the Enclosure class. + """ + to_unicode = lambda s: force_unicode(s, strings_only=True) + if categories: + categories = [to_unicode(c) for c in categories] + item = { + 'title': to_unicode(title), + 'link': iri_to_uri(link), + 'description': to_unicode(description), + 'author_email': to_unicode(author_email), + 'author_name': to_unicode(author_name), + 'author_link': iri_to_uri(author_link), + 'pubdate': pubdate, + 'comments': to_unicode(comments), + 'unique_id': to_unicode(unique_id), + 'enclosure': enclosure, + 'categories': categories or (), + 'item_copyright': to_unicode(item_copyright), + 'ttl': ttl, + } + item.update(kwargs) + self.items.append(item) + + def num_items(self): + return len(self.items) + + def root_attributes(self): + """ + Return extra attributes to place on the root (i.e. feed/channel) element. + Called from write(). + """ + return {} + + def add_root_elements(self, handler): + """ + Add elements in the root (i.e. feed/channel) element. Called + from write(). + """ + pass + + def item_attributes(self, item): + """ + Return extra attributes to place on each item (i.e. item/entry) element. + """ + return {} + + def add_item_elements(self, handler, item): + """ + Add elements on each item (i.e. item/entry) element. + """ + pass + + def write(self, outfile, encoding): + """ + Outputs the feed in the given encoding to outfile, which is a file-like + object. Subclasses should override this. + """ + raise NotImplementedError + + def writeString(self, encoding): + """ + Returns the feed in the given encoding as a string. + """ + from StringIO import StringIO + s = StringIO() + self.write(s, encoding) + return s.getvalue() + + def latest_post_date(self): + """ + Returns the latest item's pubdate. If none of them have a pubdate, + this returns the current date/time. + """ + updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] + if len(updates) > 0: + updates.sort() + return updates[-1] + else: + return datetime.datetime.now() + +class Enclosure(object): + "Represents an RSS enclosure" + def __init__(self, url, length, mime_type): + "All args are expected to be Python Unicode objects" + self.length, self.mime_type = length, mime_type + self.url = iri_to_uri(url) + +class RssFeed(SyndicationFeed): + mime_type = 'application/rss+xml' + def write(self, outfile, encoding): + handler = SimplerXMLGenerator(outfile, encoding) + handler.startDocument() + handler.startElement(u"rss", self.rss_attributes()) + handler.startElement(u"channel", self.root_attributes()) + self.add_root_elements(handler) + self.write_items(handler) + self.endChannelElement(handler) + handler.endElement(u"rss") + + def rss_attributes(self): + return {u"version": self._version} + + def write_items(self, handler): + for item in self.items: + handler.startElement(u'item', self.item_attributes(item)) + self.add_item_elements(handler, item) + handler.endElement(u"item") + + def add_root_elements(self, handler): + handler.addQuickElement(u"title", self.feed['title']) + handler.addQuickElement(u"link", self.feed['link']) + handler.addQuickElement(u"description", self.feed['description']) + if self.feed['language'] is not None: + handler.addQuickElement(u"language", self.feed['language']) + for cat in self.feed['categories']: + handler.addQuickElement(u"category", cat) + if self.feed['feed_copyright'] is not None: + handler.addQuickElement(u"copyright", self.feed['feed_copyright']) + handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8')) + if self.feed['ttl'] is not None: + handler.addQuickElement(u"ttl", self.feed['ttl']) + + def endChannelElement(self, handler): + handler.endElement(u"channel") + +class RssUserland091Feed(RssFeed): + _version = u"0.91" + def add_item_elements(self, handler, item): + handler.addQuickElement(u"title", item['title']) + handler.addQuickElement(u"link", item['link']) + if item['description'] is not None: + handler.addQuickElement(u"description", item['description']) + +class Rss201rev2Feed(RssFeed): + # Spec: http://blogs.law.harvard.edu/tech/rss + _version = u"2.0" + def add_item_elements(self, handler, item): + handler.addQuickElement(u"title", item['title']) + handler.addQuickElement(u"link", item['link']) + if item['description'] is not None: + handler.addQuickElement(u"description", item['description']) + + # Author information. + if item["author_name"] and item["author_email"]: + handler.addQuickElement(u"author", "%s (%s)" % \ + (item['author_email'], item['author_name'])) + elif item["author_email"]: + handler.addQuickElement(u"author", item["author_email"]) + elif item["author_name"]: + handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) + + if item['pubdate'] is not None: + handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8')) + if item['comments'] is not None: + handler.addQuickElement(u"comments", item['comments']) + if item['unique_id'] is not None: + handler.addQuickElement(u"guid", item['unique_id']) + if item['ttl'] is not None: + handler.addQuickElement(u"ttl", item['ttl']) + + # Enclosure. + if item['enclosure'] is not None: + handler.addQuickElement(u"enclosure", '', + {u"url": item['enclosure'].url, u"length": item['enclosure'].length, + u"type": item['enclosure'].mime_type}) + + # Categories. + for cat in item['categories']: + handler.addQuickElement(u"category", cat) + +class Atom1Feed(SyndicationFeed): + # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html + mime_type = 'application/atom+xml' + ns = u"http://www.w3.org/2005/Atom" + + def write(self, outfile, encoding): + handler = SimplerXMLGenerator(outfile, encoding) + handler.startDocument() + handler.startElement(u'feed', self.root_attributes()) + self.add_root_elements(handler) + self.write_items(handler) + handler.endElement(u"feed") + + def root_attributes(self): + if self.feed['language'] is not None: + return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} + else: + return {u"xmlns": self.ns} + + def add_root_elements(self, handler): + handler.addQuickElement(u"title", self.feed['title']) + handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) + if self.feed['feed_url'] is not None: + handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) + handler.addQuickElement(u"id", self.feed['id']) + handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8')) + if self.feed['author_name'] is not None: + handler.startElement(u"author", {}) + handler.addQuickElement(u"name", self.feed['author_name']) + if self.feed['author_email'] is not None: + handler.addQuickElement(u"email", self.feed['author_email']) + if self.feed['author_link'] is not None: + handler.addQuickElement(u"uri", self.feed['author_link']) + handler.endElement(u"author") + if self.feed['subtitle'] is not None: + handler.addQuickElement(u"subtitle", self.feed['subtitle']) + for cat in self.feed['categories']: + handler.addQuickElement(u"category", "", {u"term": cat}) + if self.feed['feed_copyright'] is not None: + handler.addQuickElement(u"rights", self.feed['feed_copyright']) + + def write_items(self, handler): + for item in self.items: + handler.startElement(u"entry", self.item_attributes(item)) + self.add_item_elements(handler, item) + handler.endElement(u"entry") + + def add_item_elements(self, handler, item): + handler.addQuickElement(u"title", item['title']) + handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) + if item['pubdate'] is not None: + handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8')) + + # Author information. + if item['author_name'] is not None: + handler.startElement(u"author", {}) + handler.addQuickElement(u"name", item['author_name']) + if item['author_email'] is not None: + handler.addQuickElement(u"email", item['author_email']) + if item['author_link'] is not None: + handler.addQuickElement(u"uri", item['author_link']) + handler.endElement(u"author") + + # Unique ID. + if item['unique_id'] is not None: + unique_id = item['unique_id'] + else: + unique_id = get_tag_uri(item['link'], item['pubdate']) + handler.addQuickElement(u"id", unique_id) + + # Summary. + if item['description'] is not None: + handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) + + # Enclosure. + if item['enclosure'] is not None: + handler.addQuickElement(u"link", '', + {u"rel": u"enclosure", + u"href": item['enclosure'].url, + u"length": item['enclosure'].length, + u"type": item['enclosure'].mime_type}) + + # Categories. + for cat in item['categories']: + handler.addQuickElement(u"category", u"", {u"term": cat}) + + # Rights. + if item['item_copyright'] is not None: + handler.addQuickElement(u"rights", item['item_copyright']) + +# This isolates the decision of what the system default is, so calling code can +# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". +DefaultFeed = Rss201rev2Feed diff --git a/sphinx/sphinxext/feed/fsdict.py b/sphinx/sphinxext/feed/fsdict.py new file mode 100644 index 0000000..04c9b84 --- /dev/null +++ b/sphinx/sphinxext/feed/fsdict.py @@ -0,0 +1,110 @@ +# −*− coding: UTF−8 −*− +from path import path +import os +import pickle +""" +A class providing dictionary access to a folder. +cribbed from http://bitbucket.org/howthebodyworks/fsdict +""" + +def get_tmp_dir(): + import tempfile + return tempfile.mkdtemp() + +class FSDict(dict): + """ + provide dictionary access to a temp dir. I don't know why i didn't just use + shelve. I think I forgot it existed. + + N.B. the keys ordering here is FS-dependent and thus unlike to be the same as + with a real dict. beware. + """ + + unclean_dirs = [] + + def __init__(self, initval=[], work_dir=None, *args, **kwargs): + if work_dir is None: + work_dir = get_tmp_dir() + self.work_dir = path(work_dir) + if not self.work_dir.exists(): + self.work_dir.mkdir() + for key, val in getattr(initval, 'iteritems', initval.__iter__)(): + self[key] = val + self.unclean_dirs.append(self.work_dir) + super(FSDict, self).__init__(*args, **kwargs) + + def __setitem__(self, key, val, *args, **kwargs): + pickle.dump(val, open(self.work_dir/key, 'w')) + + def __getitem__(self, key, *args, **kwargs): + return pickle.load(open(self.work_dir/key, 'r')) + + def __repr__(self): + """ + a hardline list of everything in the dict. may be long. + """ + return repr(dict([(k, v) for k, v in self.iteritems()])) + + def __str__(self): + """ + str is truncated somewhat. + """ + if len(self.keys()): + return '{' + repr(self.keys()[0]) + ':' + repr(self[self.keys()[0]]) + ', ...' + else: + return super(FSDict, self).__str__() + + def keys(self, *args, **kwargs): + return [key for key in self.iterkeys()] + + def iterkeys(self, *args, **kwargs): + for f in self.work_dir.files(): + yield str(self.work_dir.relpathto(f)) + + def iteritems(self): + for key in self.iterkeys(): + yield key, self[key] + + def itervalues(self): + for key in self.iterkeys(): + yield self[key] + + def __delitem__(self, key, *args, **kwargs): + (self.work_dir/key).unlink() + + def values(self, *args, **kwargs): + return [self[key] for key in self.keys()] + + def cleanup(self): + self.work_dir.rmtree() + + @classmethod + def cleanup_all(cls): + for fsd in cls.unclean_dirs: + try: + fsd.rmtree() + except OSError: + pass + + def move(self, new_dir): + + try: + self.work_dir.move(new_dir) + except Exception, e: + raise + else: + self.work_dir = new_dir + + def __eq__(self, other): + """ + when compared to a dict, equate equal if all keys and vals are equal + note, this is potentially expensive. + """ + #duck type our way to sanity: + if not hasattr(other, 'keys'): return False + #OK, it's a dict-ish thing + try: + return all([self[key]==other[key] for key in other]) and \ + len(self.keys())==len(other.keys()) + except KeyError: + return False \ No newline at end of file diff --git a/sphinx/sphinxext/feed/path.py b/sphinx/sphinxext/feed/path.py new file mode 100644 index 0000000..3652963 --- /dev/null +++ b/sphinx/sphinxext/feed/path.py @@ -0,0 +1,970 @@ +""" path.py - An object representing a path to a file or directory. + +Example: + +from path import path +d = path('/home/guido/bin') +for f in d.files('*.py'): + f.chmod(0755) + +This module requires Python 2.2 or later. + + +URL: http://www.jorendorff.com/articles/python/path +Author: Jason Orendorff (and others - see the url!) +Date: 9 Mar 2007 +""" + + +# TODO +# - Tree-walking functions don't avoid symlink loops. Matt Harrison +# sent me a patch for this. +# - Bug in write_text(). It doesn't support Universal newline mode. +# - Better error message in listdir() when self isn't a +# directory. (On Windows, the error message really sucks.) +# - Make sure everything has a good docstring. +# - Add methods for regex find and replace. +# - guess_content_type() method? +# - Perhaps support arguments to touch(). + +from __future__ import generators + +import sys, warnings, os, fnmatch, glob, shutil, codecs, md5 + +__version__ = '2.2' +__all__ = ['path'] + +# Platform-specific support for path.owner +if os.name == 'nt': + try: + import win32security + except ImportError: + win32security = None +else: + try: + import pwd + except ImportError: + pwd = None + +# Pre-2.3 support. Are unicode filenames supported? +_base = str +_getcwd = os.getcwd +try: + if os.path.supports_unicode_filenames: + _base = unicode + _getcwd = os.getcwdu +except AttributeError: + pass + +# Pre-2.3 workaround for booleans +try: + True, False +except NameError: + True, False = 1, 0 + +# Pre-2.3 workaround for basestring. +try: + basestring +except NameError: + basestring = (str, unicode) + +# Universal newline support +_textmode = 'r' +if hasattr(file, 'newlines'): + _textmode = 'U' + + +class TreeWalkWarning(Warning): + pass + +class path(_base): + """ Represents a filesystem path. + + For documentation on individual methods, consult their + counterparts in os.path. + """ + + # --- Special Python methods. + + def __repr__(self): + return 'path(%s)' % _base.__repr__(self) + + # Adding a path and a string yields a path. + def __add__(self, more): + try: + resultStr = _base.__add__(self, more) + except TypeError: #Python bug + resultStr = NotImplemented + if resultStr is NotImplemented: + return resultStr + return self.__class__(resultStr) + + def __radd__(self, other): + if isinstance(other, basestring): + return self.__class__(other.__add__(self)) + else: + return NotImplemented + + # The / operator joins paths. + def __div__(self, rel): + """ fp.__div__(rel) == fp / rel == fp.joinpath(rel) + + Join two path components, adding a separator character if + needed. + """ + return self.__class__(os.path.join(self, rel)) + + # Make the / operator work even when true division is enabled. + __truediv__ = __div__ + + def getcwd(cls): + """ Return the current working directory as a path object. """ + return cls(_getcwd()) + getcwd = classmethod(getcwd) + + + # --- Operations on path strings. + + isabs = os.path.isabs + def abspath(self): return self.__class__(os.path.abspath(self)) + def normcase(self): return self.__class__(os.path.normcase(self)) + def normpath(self): return self.__class__(os.path.normpath(self)) + def realpath(self): return self.__class__(os.path.realpath(self)) + def expanduser(self): return self.__class__(os.path.expanduser(self)) + def expandvars(self): return self.__class__(os.path.expandvars(self)) + def dirname(self): return self.__class__(os.path.dirname(self)) + basename = os.path.basename + + def expand(self): + """ Clean up a filename by calling expandvars(), + expanduser(), and normpath() on it. + + This is commonly everything needed to clean up a filename + read from a configuration file, for example. + """ + return self.expandvars().expanduser().normpath() + + def _get_namebase(self): + base, ext = os.path.splitext(self.name) + return base + + def _get_ext(self): + f, ext = os.path.splitext(_base(self)) + return ext + + def _get_drive(self): + drive, r = os.path.splitdrive(self) + return self.__class__(drive) + + parent = property( + dirname, None, None, + """ This path's parent directory, as a new path object. + + For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib') + """) + + name = property( + basename, None, None, + """ The name of this file or directory without the full path. + + For example, path('/usr/local/lib/libpython.so').name == 'libpython.so' + """) + + namebase = property( + _get_namebase, None, None, + """ The same as path.name, but with one file extension stripped off. + + For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz', + but path('/home/guido/python.tar.gz').namebase == 'python.tar' + """) + + ext = property( + _get_ext, None, None, + """ The file extension, for example '.py'. """) + + drive = property( + _get_drive, None, None, + """ The drive specifier, for example 'C:'. + This is always empty on systems that don't use drive specifiers. + """) + + def splitpath(self): + """ p.splitpath() -> Return (p.parent, p.name). """ + parent, child = os.path.split(self) + return self.__class__(parent), child + + def splitdrive(self): + """ p.splitdrive() -> Return (p.drive, ). + + Split the drive specifier from this path. If there is + no drive specifier, p.drive is empty, so the return value + is simply (path(''), p). This is always the case on Unix. + """ + drive, rel = os.path.splitdrive(self) + return self.__class__(drive), rel + + def splitext(self): + """ p.splitext() -> Return (p.stripext(), p.ext). + + Split the filename extension from this path and return + the two parts. Either part may be empty. + + The extension is everything from '.' to the end of the + last path segment. This has the property that if + (a, b) == p.splitext(), then a + b == p. + """ + filename, ext = os.path.splitext(self) + return self.__class__(filename), ext + + def stripext(self): + """ p.stripext() -> Remove one file extension from the path. + + For example, path('/home/guido/python.tar.gz').stripext() + returns path('/home/guido/python.tar'). + """ + return self.splitext()[0] + + if hasattr(os.path, 'splitunc'): + def splitunc(self): + unc, rest = os.path.splitunc(self) + return self.__class__(unc), rest + + def _get_uncshare(self): + unc, r = os.path.splitunc(self) + return self.__class__(unc) + + uncshare = property( + _get_uncshare, None, None, + """ The UNC mount point for this path. + This is empty for paths on local drives. """) + + def joinpath(self, *args): + """ Join two or more path components, adding a separator + character (os.sep) if needed. Returns a new path + object. + """ + return self.__class__(os.path.join(self, *args)) + + def splitall(self): + r""" Return a list of the path components in this path. + + The first item in the list will be a path. Its value will be + either os.curdir, os.pardir, empty, or the root directory of + this path (for example, '/' or 'C:\\'). The other items in + the list will be strings. + + path.path.joinpath(*result) will yield the original path. + """ + parts = [] + loc = self + while loc != os.curdir and loc != os.pardir: + prev = loc + loc, child = prev.splitpath() + if loc == prev: + break + parts.append(child) + parts.append(loc) + parts.reverse() + return parts + + def relpath(self): + """ Return this path as a relative path, + based from the current working directory. + """ + cwd = self.__class__(os.getcwd()) + return cwd.relpathto(self) + + def relpathto(self, dest): + """ Return a relative path from self to dest. + + If there is no relative path from self to dest, for example if + they reside on different drives in Windows, then this returns + dest.abspath(). + """ + origin = self.abspath() + dest = self.__class__(dest).abspath() + + orig_list = origin.normcase().splitall() + # Don't normcase dest! We want to preserve the case. + dest_list = dest.splitall() + + if orig_list[0] != os.path.normcase(dest_list[0]): + # Can't get here from there. + return dest + + # Find the location where the two paths start to differ. + i = 0 + for start_seg, dest_seg in zip(orig_list, dest_list): + if start_seg != os.path.normcase(dest_seg): + break + i += 1 + + # Now i is the point where the two paths diverge. + # Need a certain number of "os.pardir"s to work up + # from the origin to the point of divergence. + segments = [os.pardir] * (len(orig_list) - i) + # Need to add the diverging part of dest_list. + segments += dest_list[i:] + if len(segments) == 0: + # If they happen to be identical, use os.curdir. + relpath = os.curdir + else: + relpath = os.path.join(*segments) + return self.__class__(relpath) + + # --- Listing, searching, walking, and matching + + def listdir(self, pattern=None): + """ D.listdir() -> List of items in this directory. + + Use D.files() or D.dirs() instead if you want a listing + of just files or just subdirectories. + + The elements of the list are path objects. + + With the optional 'pattern' argument, this only lists + items whose names match the given pattern. + """ + names = os.listdir(self) + if pattern is not None: + names = fnmatch.filter(names, pattern) + return [self / child for child in names] + + def dirs(self, pattern=None): + """ D.dirs() -> List of this directory's subdirectories. + + The elements of the list are path objects. + This does not walk recursively into subdirectories + (but see path.walkdirs). + + With the optional 'pattern' argument, this only lists + directories whose names match the given pattern. For + example, d.dirs('build-*'). + """ + return [p for p in self.listdir(pattern) if p.isdir()] + + def files(self, pattern=None): + """ D.files() -> List of the files in this directory. + + The elements of the list are path objects. + This does not walk into subdirectories (see path.walkfiles). + + With the optional 'pattern' argument, this only lists files + whose names match the given pattern. For example, + d.files('*.pyc'). + """ + + return [p for p in self.listdir(pattern) if p.isfile()] + + def walk(self, pattern=None, errors='strict'): + """ D.walk() -> iterator over files and subdirs, recursively. + + The iterator yields path objects naming each child item of + this directory and its descendants. This requires that + D.isdir(). + + This performs a depth-first traversal of the directory tree. + Each directory is returned just before all its children. + + The errors= keyword argument controls behavior when an + error occurs. The default is 'strict', which causes an + exception. The other allowed values are 'warn', which + reports the error via warnings.warn(), and 'ignore'. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + childList = self.listdir() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in childList: + if pattern is None or child.fnmatch(pattern): + yield child + try: + isdir = child.isdir() + except Exception: + if errors == 'ignore': + isdir = False + elif errors == 'warn': + warnings.warn( + "Unable to access '%s': %s" + % (child, sys.exc_info()[1]), + TreeWalkWarning) + isdir = False + else: + raise + + if isdir: + for item in child.walk(pattern, errors): + yield item + + def walkdirs(self, pattern=None, errors='strict'): + """ D.walkdirs() -> iterator over subdirs, recursively. + + With the optional 'pattern' argument, this yields only + directories whose names match the given pattern. For + example, mydir.walkdirs('*test') yields only directories + with names ending in 'test'. + + The errors= keyword argument controls behavior when an + error occurs. The default is 'strict', which causes an + exception. The other allowed values are 'warn', which + reports the error via warnings.warn(), and 'ignore'. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + dirs = self.dirs() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in dirs: + if pattern is None or child.fnmatch(pattern): + yield child + for subsubdir in child.walkdirs(pattern, errors): + yield subsubdir + + def walkfiles(self, pattern=None, errors='strict'): + """ D.walkfiles() -> iterator over files in D, recursively. + + The optional argument, pattern, limits the results to files + with names that match the pattern. For example, + mydir.walkfiles('*.tmp') yields only files with the .tmp + extension. + """ + if errors not in ('strict', 'warn', 'ignore'): + raise ValueError("invalid errors parameter") + + try: + childList = self.listdir() + except Exception: + if errors == 'ignore': + return + elif errors == 'warn': + warnings.warn( + "Unable to list directory '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + return + else: + raise + + for child in childList: + try: + isfile = child.isfile() + isdir = not isfile and child.isdir() + except: + if errors == 'ignore': + continue + elif errors == 'warn': + warnings.warn( + "Unable to access '%s': %s" + % (self, sys.exc_info()[1]), + TreeWalkWarning) + continue + else: + raise + + if isfile: + if pattern is None or child.fnmatch(pattern): + yield child + elif isdir: + for f in child.walkfiles(pattern, errors): + yield f + + def fnmatch(self, pattern): + """ Return True if self.name matches the given pattern. + + pattern - A filename pattern with wildcards, + for example '*.py'. + """ + return fnmatch.fnmatch(self.name, pattern) + + def glob(self, pattern): + """ Return a list of path objects that match the pattern. + + pattern - a path relative to this directory, with wildcards. + + For example, path('/users').glob('*/bin/*') returns a list + of all the files users have in their bin directories. + """ + cls = self.__class__ + return [cls(s) for s in glob.glob(_base(self / pattern))] + + + # --- Reading or writing an entire file at once. + + def open(self, mode='r'): + """ Open this file. Return a file object. """ + return file(self, mode) + + def bytes(self): + """ Open this file, read all bytes, return them as a string. """ + f = self.open('rb') + try: + return f.read() + finally: + f.close() + + def write_bytes(self, bytes, append=False): + """ Open this file and write the given bytes to it. + + Default behavior is to overwrite any existing file. + Call p.write_bytes(bytes, append=True) to append instead. + """ + if append: + mode = 'ab' + else: + mode = 'wb' + f = self.open(mode) + try: + f.write(bytes) + finally: + f.close() + + def text(self, encoding=None, errors='strict'): + r""" Open this file, read it in, return the content as a string. + + This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r' + are automatically translated to '\n'. + + Optional arguments: + + encoding - The Unicode encoding (or character set) of + the file. If present, the content of the file is + decoded and returned as a unicode object; otherwise + it is returned as an 8-bit str. + errors - How to handle Unicode errors; see help(str.decode) + for the options. Default is 'strict'. + """ + if encoding is None: + # 8-bit + f = self.open(_textmode) + try: + return f.read() + finally: + f.close() + else: + # Unicode + f = codecs.open(self, 'r', encoding, errors) + # (Note - Can't use 'U' mode here, since codecs.open + # doesn't support 'U' mode, even in Python 2.3.) + try: + t = f.read() + finally: + f.close() + return (t.replace(u'\r\n', u'\n') + .replace(u'\r\x85', u'\n') + .replace(u'\r', u'\n') + .replace(u'\x85', u'\n') + .replace(u'\u2028', u'\n')) + + def write_text(self, text, encoding=None, errors='strict', linesep=os.linesep, append=False): + r""" Write the given text to this file. + + The default behavior is to overwrite any existing file; + to append instead, use the 'append=True' keyword argument. + + There are two differences between path.write_text() and + path.write_bytes(): newline handling and Unicode handling. + See below. + + Parameters: + + - text - str/unicode - The text to be written. + + - encoding - str - The Unicode encoding that will be used. + This is ignored if 'text' isn't a Unicode string. + + - errors - str - How to handle Unicode encoding errors. + Default is 'strict'. See help(unicode.encode) for the + options. This is ignored if 'text' isn't a Unicode + string. + + - linesep - keyword argument - str/unicode - The sequence of + characters to be used to mark end-of-line. The default is + os.linesep. You can also specify None; this means to + leave all newlines as they are in 'text'. + + - append - keyword argument - bool - Specifies what to do if + the file already exists (True: append to the end of it; + False: overwrite it.) The default is False. + + + --- Newline handling. + + write_text() converts all standard end-of-line sequences + ('\n', '\r', and '\r\n') to your platform's default end-of-line + sequence (see os.linesep; on Windows, for example, the + end-of-line marker is '\r\n'). + + If you don't like your platform's default, you can override it + using the 'linesep=' keyword argument. If you specifically want + write_text() to preserve the newlines as-is, use 'linesep=None'. + + This applies to Unicode text the same as to 8-bit text, except + there are three additional standard Unicode end-of-line sequences: + u'\x85', u'\r\x85', and u'\u2028'. + + (This is slightly different from when you open a file for + writing with fopen(filename, "w") in C or file(filename, 'w') + in Python.) + + + --- Unicode + + If 'text' isn't Unicode, then apart from newline handling, the + bytes are written verbatim to the file. The 'encoding' and + 'errors' arguments are not used and must be omitted. + + If 'text' is Unicode, it is first converted to bytes using the + specified 'encoding' (or the default encoding if 'encoding' + isn't specified). The 'errors' argument applies only to this + conversion. + + """ + if isinstance(text, unicode): + if linesep is not None: + # Convert all standard end-of-line sequences to + # ordinary newline characters. + text = (text.replace(u'\r\n', u'\n') + .replace(u'\r\x85', u'\n') + .replace(u'\r', u'\n') + .replace(u'\x85', u'\n') + .replace(u'\u2028', u'\n')) + text = text.replace(u'\n', linesep) + if encoding is None: + encoding = sys.getdefaultencoding() + bytes = text.encode(encoding, errors) + else: + # It is an error to specify an encoding if 'text' is + # an 8-bit string. + assert encoding is None + + if linesep is not None: + text = (text.replace('\r\n', '\n') + .replace('\r', '\n')) + bytes = text.replace('\n', linesep) + + self.write_bytes(bytes, append) + + def lines(self, encoding=None, errors='strict', retain=True): + r""" Open this file, read all lines, return them in a list. + + Optional arguments: + encoding - The Unicode encoding (or character set) of + the file. The default is None, meaning the content + of the file is read as 8-bit characters and returned + as a list of (non-Unicode) str objects. + errors - How to handle Unicode errors; see help(str.decode) + for the options. Default is 'strict' + retain - If true, retain newline characters; but all newline + character combinations ('\r', '\n', '\r\n') are + translated to '\n'. If false, newline characters are + stripped off. Default is True. + + This uses 'U' mode in Python 2.3 and later. + """ + if encoding is None and retain: + f = self.open(_textmode) + try: + return f.readlines() + finally: + f.close() + else: + return self.text(encoding, errors).splitlines(retain) + + def write_lines(self, lines, encoding=None, errors='strict', + linesep=os.linesep, append=False): + r""" Write the given lines of text to this file. + + By default this overwrites any existing file at this path. + + This puts a platform-specific newline sequence on every line. + See 'linesep' below. + + lines - A list of strings. + + encoding - A Unicode encoding to use. This applies only if + 'lines' contains any Unicode strings. + + errors - How to handle errors in Unicode encoding. This + also applies only to Unicode strings. + + linesep - The desired line-ending. This line-ending is + applied to every line. If a line already has any + standard line ending ('\r', '\n', '\r\n', u'\x85', + u'\r\x85', u'\u2028'), that will be stripped off and + this will be used instead. The default is os.linesep, + which is platform-dependent ('\r\n' on Windows, '\n' on + Unix, etc.) Specify None to write the lines as-is, + like file.writelines(). + + Use the keyword argument append=True to append lines to the + file. The default is to overwrite the file. Warning: + When you use this with Unicode data, if the encoding of the + existing data in the file is different from the encoding + you specify with the encoding= parameter, the result is + mixed-encoding data, which can really confuse someone trying + to read the file later. + """ + if append: + mode = 'ab' + else: + mode = 'wb' + f = self.open(mode) + try: + for line in lines: + isUnicode = isinstance(line, unicode) + if linesep is not None: + # Strip off any existing line-end and add the + # specified linesep string. + if isUnicode: + if line[-2:] in (u'\r\n', u'\x0d\x85'): + line = line[:-2] + elif line[-1:] in (u'\r', u'\n', + u'\x85', u'\u2028'): + line = line[:-1] + else: + if line[-2:] == '\r\n': + line = line[:-2] + elif line[-1:] in ('\r', '\n'): + line = line[:-1] + line += linesep + if isUnicode: + if encoding is None: + encoding = sys.getdefaultencoding() + line = line.encode(encoding, errors) + f.write(line) + finally: + f.close() + + def read_md5(self): + """ Calculate the md5 hash for this file. + + This reads through the entire file. + """ + f = self.open('rb') + try: + m = md5.new() + while True: + d = f.read(8192) + if not d: + break + m.update(d) + finally: + f.close() + return m.digest() + + # --- Methods for querying the filesystem. + + exists = os.path.exists + isdir = os.path.isdir + isfile = os.path.isfile + islink = os.path.islink + ismount = os.path.ismount + + if hasattr(os.path, 'samefile'): + samefile = os.path.samefile + + getatime = os.path.getatime + atime = property( + getatime, None, None, + """ Last access time of the file. """) + + getmtime = os.path.getmtime + mtime = property( + getmtime, None, None, + """ Last-modified time of the file. """) + + if hasattr(os.path, 'getctime'): + getctime = os.path.getctime + ctime = property( + getctime, None, None, + """ Creation time of the file. """) + + getsize = os.path.getsize + size = property( + getsize, None, None, + """ Size of the file, in bytes. """) + + if hasattr(os, 'access'): + def access(self, mode): + """ Return true if current user has access to this path. + + mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK + """ + return os.access(self, mode) + + def stat(self): + """ Perform a stat() system call on this path. """ + return os.stat(self) + + def lstat(self): + """ Like path.stat(), but do not follow symbolic links. """ + return os.lstat(self) + + def get_owner(self): + r""" Return the name of the owner of this file or directory. + + This follows symbolic links. + + On Windows, this returns a name of the form ur'DOMAIN\User Name'. + On Windows, a group can own a file or directory. + """ + if os.name == 'nt': + if win32security is None: + raise Exception("path.owner requires win32all to be installed") + desc = win32security.GetFileSecurity( + self, win32security.OWNER_SECURITY_INFORMATION) + sid = desc.GetSecurityDescriptorOwner() + account, domain, typecode = win32security.LookupAccountSid(None, sid) + return domain + u'\\' + account + else: + if pwd is None: + raise NotImplementedError("path.owner is not implemented on this platform.") + st = self.stat() + return pwd.getpwuid(st.st_uid).pw_name + + owner = property( + get_owner, None, None, + """ Name of the owner of this file or directory. """) + + if hasattr(os, 'statvfs'): + def statvfs(self): + """ Perform a statvfs() system call on this path. """ + return os.statvfs(self) + + if hasattr(os, 'pathconf'): + def pathconf(self, name): + return os.pathconf(self, name) + + + # --- Modifying operations on files and directories + + def utime(self, times): + """ Set the access and modified times of this file. """ + os.utime(self, times) + + def chmod(self, mode): + os.chmod(self, mode) + + if hasattr(os, 'chown'): + def chown(self, uid, gid): + os.chown(self, uid, gid) + + def rename(self, new): + os.rename(self, new) + + def renames(self, new): + os.renames(self, new) + + + # --- Create/delete operations on directories + + def mkdir(self, mode=0777): + os.mkdir(self, mode) + + def makedirs(self, mode=0777): + os.makedirs(self, mode) + + def rmdir(self): + os.rmdir(self) + + def removedirs(self): + os.removedirs(self) + + + # --- Modifying operations on files + + def touch(self): + """ Set the access/modified times of this file to the current time. + Create the file if it does not exist. + """ + fd = os.open(self, os.O_WRONLY | os.O_CREAT, 0666) + os.close(fd) + os.utime(self, None) + + def remove(self): + os.remove(self) + + def unlink(self): + os.unlink(self) + + + # --- Links + + if hasattr(os, 'link'): + def link(self, newpath): + """ Create a hard link at 'newpath', pointing to this file. """ + os.link(self, newpath) + + if hasattr(os, 'symlink'): + def symlink(self, newlink): + """ Create a symbolic link at 'newlink', pointing here. """ + os.symlink(self, newlink) + + if hasattr(os, 'readlink'): + def readlink(self): + """ Return the path to which this symbolic link points. + + The result may be an absolute or a relative path. + """ + return self.__class__(os.readlink(self)) + + def readlinkabs(self): + """ Return the path to which this symbolic link points. + + The result is always an absolute path. + """ + p = self.readlink() + if p.isabs(): + return p + else: + return (self.parent / p).abspath() + + + # --- High-level functions from shutil + + copyfile = shutil.copyfile + copymode = shutil.copymode + copystat = shutil.copystat + copy = shutil.copy + copy2 = shutil.copy2 + copytree = shutil.copytree + if hasattr(shutil, 'move'): + move = shutil.move + rmtree = shutil.rmtree + + + # --- Special stuff from os + + if hasattr(os, 'chroot'): + def chroot(self): + os.chroot(self) + + if hasattr(os, 'startfile'): + def startfile(self): + os.startfile(self) +