# Utility functions
# Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup <james@nocrew.org>
-# $Id: utils.py,v 1.62 2004-01-21 03:48:58 troup Exp $
+# $Id: utils.py,v 1.68 2004-06-23 23:11:47 troup Exp $
################################################################################
################################################################################
-import commands, os, pwd, re, select, socket, shutil, string, sys, tempfile, traceback;
+import commands, encodings.ascii, encodings.utf_8, encodings.latin_1, \
+ email.Header, os, pwd, re, select, socket, shutil, string, sys, \
+ tempfile, traceback;
import apt_pkg;
import db_access;
re_multi_line_field = re.compile(r"^\s(.*)");
re_taint_free = re.compile(r"^[-+~\.\w]+$");
-re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\> \t]+)\>");
+re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>");
changes_parse_error_exc = "Can't parse line in .changes file";
invalid_dsc_format_exc = "Invalid .dsc file";
################################################################################
+class Error(Exception):
+ """Base class for exceptions in this module."""
+ pass;
+
+class ParseMaintError(Error):
+ """Exception raised for errors in parsing a maintainer field.
+
+ Attributes:
+ message -- explanation of the error
+ """
+
+ def __init__(self, message):
+ self.args = message,;
+ self.message = message;
+
+################################################################################
+
def open_file(filename, mode='r'):
try:
f = open(filename, mode);
if section.find('/') != -1:
component = section.split('/')[0];
- if component.lower() == "non-us" and section.count('/') > 0:
+ if component.lower() == "non-us" and section.find('/') != -1:
s = component + '/' + section.split('/')[1];
if Cnf.has_key("Component::%s" % s): # Avoid e.g. non-US/libs
component = s;
################################################################################
-# Fix the `Maintainer:' field to be an RFC822 compatible address.
-# cf. Debian Policy Manual (D.2.4)
-#
-# 06:28|<Culus> 'The standard sucks, but my tool is supposed to
-# interoperate with it. I know - I'll fix the suckage
-# and make things incompatible!'
+def force_to_utf8(s):
+ """Forces a string to UTF-8. If the string isn't already UTF-8,
+it's assumed to be ISO-8859-1."""
+ try:
+ unicode(s, 'utf-8');
+ return s;
+ except UnicodeError:
+ latin1_s = unicode(s,'iso8859-1');
+ return latin1_s.encode('utf-8');
+
+def rfc2047_encode(s):
+ """Encodes a (header) string per RFC2047 if necessary. If the
+string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1."""
+ try:
+ encodings.ascii.Codec().decode(s);
+ return s;
+ except UnicodeError:
+ pass;
+ try:
+ encodings.utf_8.Codec().decode(s);
+ h = email.Header.Header(s, 'utf-8', 998);
+ return str(h);
+ except UnicodeError:
+ h = email.Header.Header(s, 'iso-8859-1', 998);
+ return str(h);
+
+################################################################################
+
+# <Culus> 'The standard sucks, but my tool is supposed to interoperate
+# with it. I know - I'll fix the suckage and make things
+# incompatible!'
def fix_maintainer (maintainer):
- m = re_parse_maintainer.match(maintainer);
- rfc822 = maintainer;
- name = "";
- email = "";
- if m != None and len(m.groups()) == 2:
+ """Parses a Maintainer or Changed-By field and returns:
+ (1) an RFC822 compatible version,
+ (2) an RFC2047 compatible version,
+ (3) the name
+ (4) the email
+
+The name is forced to UTF-8 for both (1) and (3). If the name field
+contains '.' or ',' (as allowed by Debian policy), (1) and (2) are
+switched to 'email (name)' format."""
+ maintainer = maintainer.strip()
+ if not maintainer:
+ return ('', '', '', '');
+
+ if maintainer.find("<") == -1:
+ email = maintainer;
+ name = "";
+ elif (maintainer[0] == "<" and maintainer[-1:] == ">"):
+ email = maintainer[1:-1];
+ name = "";
+ else:
+ m = re_parse_maintainer.match(maintainer);
+ if not m:
+ raise ParseMaintError, "Doesn't parse as a valid Maintainer field."
name = m.group(1);
email = m.group(2);
- if name.find(',') != -1 or name.find('.') != -1:
- rfc822 = "%s (%s)" % (email, name);
- return (rfc822, name, email)
+
+ # Get an RFC2047 compliant version of the name
+ rfc2047_name = rfc2047_encode(name);
+
+ # Force the name to be UTF-8
+ name = force_to_utf8(name);
+
+ if name.find(',') != -1 or name.find('.') != -1:
+ rfc822_maint = "%s (%s)" % (email, name);
+ rfc2047_maint = "%s (%s)" % (email, rfc2047_name);
+ else:
+ rfc822_maint = "%s <%s>" % (name, email);
+ rfc2047_maint = "%s <%s>" % (rfc2047_name, email);
+
+ if email.find("@") == -1 and email.find("buildd_") != 0:
+ raise ParseMaintError, "No @ found in email address part."
+
+ return (rfc822_maint, rfc2047_maint, name, email);
################################################################################
################################################################################
def size_type (c):
- t = " b";
- if c > 10000:
- c = c / 1000;
- t = " Kb";
- if c > 10000:
- c = c / 1000;
- t = " Mb";
+ t = " B";
+ if c > 10240:
+ c = c / 1024;
+ t = " KB";
+ if c > 10240:
+ c = c / 1024;
+ t = " MB";
return ("%d%s" % (c, t))
################################################################################
################################################################################
+def temp_filename(directory=None, dotprefix=None, perms=0700):
+ """Return a secure and unique filename by pre-creating it.
+If 'directory' is non-null, it will be the directory the file is pre-created in.
+If 'dotprefix' is non-null, the filename will be prefixed with a '.'."""
+
+ if directory:
+ old_tempdir = tempfile.tempdir;
+ tempfile.tempdir = directory;
+
+ filename = tempfile.mktemp();
+
+ if dotprefix:
+ filename = "%s/.%s" % (os.path.dirname(filename), os.path.basename(filename));
+ fd = os.open(filename, os.O_RDWR|os.O_CREAT|os.O_EXCL, perms);
+ os.close(fd);
+
+ if directory:
+ tempfile.tempdir = old_tempdir;
+
+ return filename;
+
+################################################################################
+
apt_pkg.init();
Cnf = apt_pkg.newConfiguration();