From: Joachim Breitner Date: Wed, 29 Jul 2009 16:25:49 +0000 (+0200) Subject: Bundle deb822.py from python-debian X-Git-Url: https://git.donarmstrong.com/?p=wannabuild.git;a=commitdiff_plain;h=4febee82da34df21413dfb34f1b3cf8943455065 Bundle deb822.py from python-debian This module does not support all features needed by add-sources.py on stable, so for now we inline it. --- diff --git a/bin/add-sources.py b/bin/add-sources.py index 6f63260..2c36222 100755 --- a/bin/add-sources.py +++ b/bin/add-sources.py @@ -1,4 +1,5 @@ #!/usr/bin/python +# vim: fileencoding=utf-8 # Given as input a Packages and a Sources file, produces as output a new # Packages containing fake packages which are installable if and only if the @@ -16,7 +17,1074 @@ import string import sys from optparse import OptionParser -from debian_bundle import deb822 +#from debian_bundle import deb822 + +# vim: fileencoding=utf-8 +# +# A python interface for various rfc822-like formatted files used by Debian +# (.changes, .dsc, Packages, Sources, etc) +# +# Copyright (C) 2005-2006 dann frazier +# Copyright (C) 2006-2008 John Wright +# Copyright (C) 2006 Adeodato Simó +# Copyright (C) 2008 Stefano Zacchiroli +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +def function_deprecated_by(x): x + +try: + import apt_pkg + _have_apt_pkg = True +except ImportError: + _have_apt_pkg = False + +import new +import re +import string +import sys +import StringIO +import UserDict + +class OrderedSet(object): + """A set-like object that preserves order when iterating over it + + We use this to keep track of keys in Deb822Dict, because it's much faster + to look up if a key is in a set than in a list. + """ + + def __init__(self, iterable=[]): + self.__set = set() + self.__order = [] + for item in iterable: + self.add(item) + + def add(self, item): + if item not in self: + # set.add will raise TypeError if something's unhashable, so we + # don't have to handle that ourselves + self.__set.add(item) + self.__order.append(item) + + def remove(self, item): + # set.remove will raise KeyError, so we don't need to handle that + # ourselves + self.__set.remove(item) + self.__order.remove(item) + + def __iter__(self): + # Return an iterator of items in the order they were added + return iter(self.__order) + + def __contains__(self, item): + # This is what makes OrderedSet faster than using a list to keep track + # of keys. Lookup in a set is O(1) instead of O(n) for a list. + return item in self.__set + + ### list-like methods + append = add + + def extend(self, iterable): + for item in iterable: + self.add(item) + ### + +class Deb822Dict(object, UserDict.DictMixin): + # Subclassing UserDict.DictMixin because we're overriding so much dict + # functionality that subclassing dict requires overriding many more than + # the four methods that DictMixin requires. + """A dictionary-like object suitable for storing RFC822-like data. + + Deb822Dict behaves like a normal dict, except: + - key lookup is case-insensitive + - key order is preserved + - if initialized with a _parsed parameter, it will pull values from + that dictionary-like object as needed (rather than making a copy). + The _parsed dict is expected to be able to handle case-insensitive + keys. + + If _parsed is not None, an optional _fields parameter specifies which keys + in the _parsed dictionary are exposed. + """ + + # See the end of the file for the definition of _strI + + def __init__(self, _dict=None, _parsed=None, _fields=None): + self.__dict = {} + self.__keys = OrderedSet() + self.__parsed = None + + if _dict is not None: + # _dict may be a dict or a list of two-sized tuples + if hasattr(_dict, 'items'): + items = _dict.items() + else: + items = list(_dict) + + try: + for k, v in items: + self[k] = v + except ValueError: + this = len(self.__keys) + len_ = len(items[this]) + raise ValueError('dictionary update sequence element #%d has ' + 'length %d; 2 is required' % (this, len_)) + + if _parsed is not None: + self.__parsed = _parsed + if _fields is None: + self.__keys.extend([ _strI(k) for k in self.__parsed.keys() ]) + else: + self.__keys.extend([ _strI(f) for f in _fields if self.__parsed.has_key(f) ]) + + ### BEGIN DictMixin methods + + def __setitem__(self, key, value): + key = _strI(key) + self.__keys.add(key) + self.__dict[key] = value + + def __getitem__(self, key): + key = _strI(key) + try: + return self.__dict[key] + except KeyError: + if self.__parsed is not None and key in self.__keys: + return self.__parsed[key] + else: + raise + + def __delitem__(self, key): + key = _strI(key) + self.__keys.remove(key) + try: + del self.__dict[key] + except KeyError: + # If we got this far, the key was in self.__keys, so it must have + # only been in the self.__parsed dict. + pass + + def has_key(self, key): + key = _strI(key) + return key in self.__keys + + def keys(self): + return [str(key) for key in self.__keys] + + ### END DictMixin methods + + def __repr__(self): + return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()]) + + def __eq__(self, other): + mykeys = self.keys(); mykeys.sort() + otherkeys = other.keys(); otherkeys.sort() + if not mykeys == otherkeys: + return False + + for key in mykeys: + if self[key] != other[key]: + return False + + # If we got here, everything matched + return True + + def copy(self): + # Use self.__class__ so this works as expected for subclasses + copy = self.__class__(self) + return copy + + # TODO implement __str__() and make dump() use that? + + +class Deb822(Deb822Dict): + + def __init__(self, sequence=None, fields=None, _parsed=None): + """Create a new Deb822 instance. + + :param sequence: a string, or any any object that returns a line of + input each time, normally a file(). Alternately, sequence can + be a dict that contains the initial key-value pairs. + + :param fields: if given, it is interpreted as a list of fields that + should be parsed (the rest will be discarded). + + :param _parsed: internal parameter. + """ + + if hasattr(sequence, 'items'): + _dict = sequence + sequence = None + else: + _dict = None + Deb822Dict.__init__(self, _dict=_dict, _parsed=_parsed, _fields=fields) + + if sequence is not None: + try: + self._internal_parser(sequence, fields) + except EOFError: + pass + + self.gpg_info = None + + def iter_paragraphs(cls, sequence, fields=None, use_apt_pkg=True, + shared_storage=False): + """Generator that yields a Deb822 object for each paragraph in sequence. + + :param sequence: same as in __init__. + + :param fields: likewise. + + :param use_apt_pkg: if sequence is a file(), apt_pkg will be used + if available to parse the file, since it's much much faster. Set + this parameter to False to disable using apt_pkg. + :param shared_storage: if sequence is a file(), use_apt_pkg is True, + and shared_storage is True, yielded objects will share storage, so + they can't be kept across iterations. (Also, PGP signatures won't + be stripped.) By default, this parameter is False, causing a copy + of the parsed data to be made through each iteration. Except for + with raw Deb822 paragraphs (as opposed to _multivalued subclasses), + the speed gained by setting shared_storage=True is marginal. This + parameter has no effect if use_apt_pkg is False or apt_pkg is not + available. + """ + + if _have_apt_pkg and use_apt_pkg and isinstance(sequence, file): + parser = apt_pkg.ParseTagFile(sequence) + while parser.Step() == 1: + if shared_storage: + parsed = parser.Section + else: + # Since parser.Section doesn't have an items method, we + # need to imitate that method here and make a Deb822Dict + # from the result in order to preserve order. + items = [(key, parser.Section[key]) + for key in parser.Section.keys()] + parsed = Deb822Dict(items) + yield cls(fields=fields, _parsed=parsed) + + else: + iterable = iter(sequence) + x = cls(iterable, fields) + while len(x) != 0: + yield x + x = cls(iterable, fields) + + iter_paragraphs = classmethod(iter_paragraphs) + + ### + + def _internal_parser(self, sequence, fields=None): + single = re.compile("^(?P\S+)\s*:\s*(?P\S.*?)\s*$") + multi = re.compile("^(?P\S+)\s*:\s*$") + multidata = re.compile("^\s(?P.+?)\s*$") + + wanted_field = lambda f: fields is None or f in fields + + if isinstance(sequence, basestring): + sequence = sequence.splitlines() + + curkey = None + content = "" + for line in self.gpg_stripped_paragraph(sequence): + m = single.match(line) + if m: + if curkey: + self[curkey] += content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + self[curkey] = m.group('data') + content = "" + continue + + m = multi.match(line) + if m: + if curkey: + self[curkey] += content + + if not wanted_field(m.group('key')): + curkey = None + continue + + curkey = m.group('key') + self[curkey] = "" + content = "" + continue + + m = multidata.match(line) + if m: + content += '\n' + line # XXX not m.group('data')? + continue + + if curkey: + self[curkey] += content + + def __str__(self): + return self.dump() + + # __repr__ is handled by Deb822Dict + + def dump(self, fd=None): + """Dump the the contents in the original format + + If fd is None, return a string. + """ + + if fd is None: + fd = StringIO.StringIO() + return_string = True + else: + return_string = False + for key, value in self.iteritems(): + if not value or value[0] == '\n': + # Avoid trailing whitespace after "Field:" if it's on its own + # line or the value is empty + # XXX Uh, really print value if value == '\n'? + fd.write('%s:%s\n' % (key, value)) + else: + fd.write('%s: %s\n' % (key, value)) + if return_string: + return fd.getvalue() + + ### + + def is_single_line(self, s): + if s.count("\n"): + return False + else: + return True + + isSingleLine = function_deprecated_by(is_single_line) + + def is_multi_line(self, s): + return not self.is_single_line(s) + + isMultiLine = function_deprecated_by(is_multi_line) + + def _merge_fields(self, s1, s2): + if not s2: + return s1 + if not s1: + return s2 + + if self.is_single_line(s1) and self.is_single_line(s2): + ## some fields are delimited by a single space, others + ## a comma followed by a space. this heuristic assumes + ## that there are multiple items in one of the string fields + ## so that we can pick up on the delimiter being used + delim = ' ' + if (s1 + s2).count(', '): + delim = ', ' + + L = (s1 + delim + s2).split(delim) + L.sort() + + prev = merged = L[0] + + for item in L[1:]: + ## skip duplicate entries + if item == prev: + continue + merged = merged + delim + item + prev = item + return merged + + if self.is_multi_line(s1) and self.is_multi_line(s2): + for item in s2.splitlines(True): + if item not in s1.splitlines(True): + s1 = s1 + "\n" + item + return s1 + + raise ValueError + + _mergeFields = function_deprecated_by(_merge_fields) + + def merge_fields(self, key, d1, d2=None): + ## this method can work in two ways - abstract that away + if d2 == None: + x1 = self + x2 = d1 + else: + x1 = d1 + x2 = d2 + + ## we only have to do work if both objects contain our key + ## otherwise, we just take the one that does, or raise an + ## exception if neither does + if key in x1 and key in x2: + merged = self._mergeFields(x1[key], x2[key]) + elif key in x1: + merged = x1[key] + elif key in x2: + merged = x2[key] + else: + raise KeyError + + ## back to the two different ways - if this method was called + ## upon an object, update that object in place. + ## return nothing in this case, to make the author notice a + ## problem if she assumes the object itself will not be modified + if d2 == None: + self[key] = merged + return None + + return merged + + mergeFields = function_deprecated_by(merge_fields) + + def split_gpg_and_payload(sequence): + """Return a (gpg_pre, payload, gpg_post) tuple + + Each element of the returned tuple is a list of lines (with trailing + whitespace stripped). + """ + + gpg_pre_lines = [] + lines = [] + gpg_post_lines = [] + state = 'SAFE' + gpgre = re.compile(r'^-----(?PBEGIN|END) PGP (?P[^-]+)-----$') + blank_line = re.compile('^$') + first_line = True + + for line in sequence: + line = line.strip('\r\n') + + # skip initial blank lines, if any + if first_line: + if blank_line.match(line): + continue + else: + first_line = False + + m = gpgre.match(line) + + if not m: + if state == 'SAFE': + if not blank_line.match(line): + lines.append(line) + else: + if not gpg_pre_lines: + # There's no gpg signature, so we should stop at + # this blank line + break + elif state == 'SIGNED MESSAGE': + if blank_line.match(line): + state = 'SAFE' + else: + gpg_pre_lines.append(line) + elif state == 'SIGNATURE': + gpg_post_lines.append(line) + else: + if m.group('action') == 'BEGIN': + state = m.group('what') + elif m.group('action') == 'END': + gpg_post_lines.append(line) + break + if not blank_line.match(line): + if not lines: + gpg_pre_lines.append(line) + else: + gpg_post_lines.append(line) + + if len(lines): + return (gpg_pre_lines, lines, gpg_post_lines) + else: + raise EOFError('only blank lines found in input') + + split_gpg_and_payload = staticmethod(split_gpg_and_payload) + + def gpg_stripped_paragraph(cls, sequence): + return cls.split_gpg_and_payload(sequence)[1] + + gpg_stripped_paragraph = classmethod(gpg_stripped_paragraph) + + def get_gpg_info(self): + """Return a GpgInfo object with GPG signature information + + This method will raise ValueError if the signature is not available + (e.g. the original text cannot be found)""" + + # raw_text is saved (as a string) only for Changes and Dsc (see + # _gpg_multivalued.__init__) which is small compared to Packages or + # Sources which contain no signature + if not hasattr(self, 'raw_text'): + raise ValueError, "original text cannot be found" + + if self.gpg_info is None: + self.gpg_info = GpgInfo.from_sequence(self.raw_text) + + return self.gpg_info + +### + +# XXX check what happens if input contains more that one signature +class GpgInfo(dict): + """A wrapper around gnupg parsable output obtained via --status-fd + + This class is really a dictionary containing parsed output from gnupg plus + some methods to make sense of the data. + Keys are keywords and values are arguments suitably splitted. + See /usr/share/doc/gnupg/DETAILS.gz""" + + # keys with format "key keyid uid" + uidkeys = ('GOODSIG', 'EXPSIG', 'EXPKEYSIG', 'REVKEYSIG', 'BADSIG') + + def valid(self): + """Is the signature valid?""" + return self.has_key('GOODSIG') or self.has_key('VALIDSIG') + +# XXX implement as a property? +# XXX handle utf-8 %-encoding + def uid(self): + """Return the primary ID of the signee key, None is not available""" + pass + + @staticmethod + def from_output(out, err=None): + """Create a new GpgInfo object from gpg(v) --status-fd output (out) and + optionally collect stderr as well (err). + + Both out and err can be lines in newline-terminated sequence or regular strings.""" + + n = GpgInfo() + + if isinstance(out, basestring): + out = out.split('\n') + if isinstance(err, basestring): + err = err.split('\n') + + n.out = out + n.err = err + + header = '[GNUPG:] ' + for l in out: + if not l.startswith(header): + continue + + l = l[len(header):] + l = l.strip('\n') + + # str.partition() would be better, 2.5 only though + s = l.find(' ') + key = l[:s] + if key in GpgInfo.uidkeys: + # value is "keyid UID", don't split UID + value = l[s+1:].split(' ', 1) + else: + value = l[s+1:].split(' ') + + n[key] = value + return n + +# XXX how to handle sequences of lines? file() returns \n-terminated + @staticmethod + def from_sequence(sequence, keyrings=['/usr/share/keyrings/debian-keyring.gpg'], + executable=["/usr/bin/gpgv"]): + """Create a new GpgInfo object from the given sequence. + + Sequence is a sequence of lines or a string + executable is a list of args for subprocess.Popen, the first element being the gpg executable""" + + # XXX check for gpg as well and use --verify accordingly? + args = executable + #args.extend(["--status-fd", "1", "--no-default-keyring"]) + args.extend(["--status-fd", "1"]) + import os + [args.extend(["--keyring", k]) for k in keyrings if os.path.isfile(k) and os.access(k, os.R_OK)] + + if "--keyring" not in args: + raise IOError, "cannot access none of given keyrings" + + import subprocess + p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # XXX what to do with exit code? + + if isinstance(sequence, basestring): + (out, err) = p.communicate(sequence) + else: + (out, err) = p.communicate("\n".join(sequence)) + + return GpgInfo.from_output(out, err) + + @staticmethod + def from_file(target, *args): + """Create a new GpgInfo object from the given file, calls from_sequence(file(target), *args)""" + return from_sequence(file(target), *args) + +### + +class PkgRelation(object): + """Inter-package relationships + + Structured representation of the relationships of a package to another, + i.e. of what can appear in a Deb882 field like Depends, Recommends, + Suggests, ... (see Debian Policy 7.1). + """ + + # XXX *NOT* a real dependency parser, and that is not even a goal here, we + # just parse as much as we need to split the various parts composing a + # dependency, checking their correctness wrt policy is out of scope + __dep_RE = re.compile( \ + r'^\s*(?P[a-zA-Z0-9.+\-]{2,})(\s*\(\s*(?P[>=<]+)\s*(?P[0-9a-zA-Z:\-+~.]+)\s*\))?(\s*\[(?P[\s!\w\-]+)\])?\s*$') + __comma_sep_RE = re.compile(r'\s*,\s*') + __pipe_sep_RE = re.compile(r'\s*\|\s*') + __blank_sep_RE = re.compile(r'\s*') + + @classmethod + def parse_relations(cls, raw): + """Parse a package relationship string (i.e. the value of a field like + Depends, Recommends, Build-Depends ...) + """ + def parse_archs(raw): + # assumption: no space beween '!' and architecture name + archs = [] + for arch in cls.__blank_sep_RE.split(raw.strip()): + if len(arch) and arch[0] == '!': + archs.append((False, arch[1:])) + else: + archs.append((True, arch)) + return archs + + def parse_rel(raw): + match = cls.__dep_RE.match(raw) + if match: + parts = match.groupdict() + d = { 'name': parts['name'] } + if not (parts['relop'] is None or parts['version'] is None): + d['version'] = (parts['relop'], parts['version']) + else: + d['version'] = None + if parts['archs'] is None: + d['arch'] = None + else: + d['arch'] = parse_archs(parts['archs']) + return d + else: + print >> sys.stderr, \ + 'deb822.py: WARNING: cannot parse package' \ + ' relationship "%s", returning it raw' % raw + return { 'name': raw, 'version': None, 'arch': None } + + tl_deps = cls.__comma_sep_RE.split(raw.strip()) # top-level deps + cnf = map(cls.__pipe_sep_RE.split, tl_deps) + return map(lambda or_deps: map(parse_rel, or_deps), cnf) + + @staticmethod + def str(rels): + """Format to string structured inter-package relationships + + Perform the inverse operation of parse_relations, returning a string + suitable to be written in a package stanza. + """ + def pp_arch(arch_spec): + (excl, arch) = arch_spec + if excl: + return arch + else: + return '!' + arch + + def pp_atomic_dep(dep): + s = dep['name'] + if dep.has_key('version') and dep['version'] is not None: + s += ' (%s %s)' % dep['version'] + if dep.has_key('arch') and dep['arch'] is not None: + s += ' [%s]' % string.join(map(pp_arch, dep['arch'])) + return s + + pp_or_dep = lambda deps: string.join(map(pp_atomic_dep, deps), ' | ') + return string.join(map(pp_or_dep, rels), ', ') + + +class _lowercase_dict(dict): + """Dictionary wrapper which lowercase keys upon lookup.""" + + def __getitem__(self, key): + return dict.__getitem__(self, key.lower()) + + +class _PkgRelationMixin(object): + """Package relationship mixin + + Inheriting from this mixin you can extend a Deb882 object with attributes + letting you access inter-package relationship in a structured way, rather + than as strings. For example, while you can usually use pkg['depends'] to + obtain the Depends string of package pkg, mixing in with this class you + gain pkg.depends to access Depends as a Pkgrel instance + + To use, subclass _PkgRelationMixin from a class with a _relationship_fields + attribute. It should be a list of field names for which structured access + is desired; for each of them a method wild be added to the inherited class. + The method name will be the lowercase version of field name; '-' will be + mangled as '_'. The method would return relationships in the same format of + the PkgRelation' relations property. + + See Packages and Sources as examples. + """ + + def __init__(self, *args, **kwargs): + self.__relations = _lowercase_dict({}) + self.__parsed_relations = False + for name in self._relationship_fields: + # To avoid reimplementing Deb822 key lookup logic we use a really + # simple dict subclass which just lowercase keys upon lookup. Since + # dictionary building happens only here, we ensure that all keys + # are in fact lowercase. + # With this trick we enable users to use the same key (i.e. field + # name) of Deb822 objects on the dictionary returned by the + # relations property. + keyname = name.lower() + if self.has_key(name): + self.__relations[keyname] = None # lazy value + # all lazy values will be expanded before setting + # __parsed_relations to True + else: + self.__relations[keyname] = [] + + @property + def relations(self): + """Return a dictionary of inter-package relationships among the current + and other packages. + + Dictionary keys depend on the package kind. Binary packages have keys + like 'depends', 'recommends', ... while source packages have keys like + 'build-depends', 'build-depends-indep' and so on. See the Debian policy + for the comprehensive field list. + + Dictionary values are package relationships returned as lists of lists + of dictionaries (see below for some examples). + + The encoding of package relationships is as follows: + - the top-level lists corresponds to the comma-separated list of + Deb822, their components form a conjuction, i.e. they have to be + AND-ed together + - the inner lists corresponds to the pipe-separated list of Deb822, + their components form a disjunction, i.e. they have to be OR-ed + together + - member of the inner lists are dictionaries with the following keys: + - name: package (or virtual package) name + - version: A pair if the relationship is + versioned, None otherwise. operator is one of "<<", + "<=", "=", ">=", ">>"; version is the given version as + a string. + - arch: A list of pairs if the + relationship is architecture specific, None otherwise. + Polarity is a boolean (false if the architecture is + negated with "!", true otherwise), architecture the + Debian archtiecture name as a string. + + Examples: + + "emacs | emacsen, make, debianutils (>= 1.7)" becomes + [ [ {'name': 'emacs'}, {'name': 'emacsen'} ], + [ {'name': 'make'} ], + [ {'name': 'debianutils', 'version': ('>=', '1.7')} ] ] + + "tcl8.4-dev, procps [!hurd-i386]" becomes + [ [ {'name': 'tcl8.4-dev'} ], + [ {'name': 'procps', 'arch': (false, 'hurd-i386')} ] ] + """ + if not self.__parsed_relations: + lazy_rels = filter(lambda n: self.__relations[n] is None, + self.__relations.keys()) + for n in lazy_rels: + self.__relations[n] = PkgRelation.parse_relations(self[n]) + self.__parsed_relations = True + return self.__relations + +class _multivalued(Deb822): + """A class with (R/W) support for multivalued fields. + + To use, create a subclass with a _multivalued_fields attribute. It should + be a dictionary with *lower-case* keys, with lists of human-readable + identifiers of the fields as the values. Please see Dsc, Changes, and + PdiffIndex as examples. + """ + + def __init__(self, *args, **kwargs): + Deb822.__init__(self, *args, **kwargs) + + for field, fields in self._multivalued_fields.items(): + try: + contents = self[field] + except KeyError: + continue + + if self.is_multi_line(contents): + self[field] = [] + updater_method = self[field].append + else: + self[field] = Deb822Dict() + updater_method = self[field].update + + for line in filter(None, contents.splitlines()): + updater_method(Deb822Dict(zip(fields, line.split()))) + + def dump(self, fd=None): + """Dump the contents in the original format + + If fd is None, return a string. + """ + + if fd is None: + fd = StringIO.StringIO() + return_string = True + else: + return_string = False + for key in self.keys(): + keyl = key.lower() + if keyl not in self._multivalued_fields: + value = self[key] + if not value or value[0] == '\n': + # XXX Uh, really print value if value == '\n'? + fd.write('%s:%s\n' % (key, value)) + else: + fd.write('%s: %s\n' % (key, value)) + else: + fd.write(key + ":") + if hasattr(self[key], 'keys'): # single-line + array = [ self[key] ] + else: # multi-line + fd.write("\n") + array = self[key] + + order = self._multivalued_fields[keyl] + try: + field_lengths = self._fixed_field_lengths + except AttributeError: + field_lengths = {} + for item in array: + for x in order: + raw_value = str(item[x]) + try: + length = field_lengths[keyl][x] + except KeyError: + value = raw_value + else: + value = (length - len(raw_value)) * " " + raw_value + fd.write(" %s" % value) + fd.write("\n") + if return_string: + return fd.getvalue() + + +### + + +class _gpg_multivalued(_multivalued): + """A _multivalued class that can support gpg signed objects + + This class's feature is that it stores the raw text before parsing so that + gpg can verify the signature. Use it just like you would use the + _multivalued class. + + This class only stores raw text if it is given a raw string, or if it + detects a gpg signature when given a file or sequence of lines (see + Deb822.split_gpg_and_payload for details). + """ + + def __init__(self, *args, **kwargs): + try: + sequence = args[0] + except IndexError: + sequence = kwargs.get("sequence", None) + + if sequence is not None: + if isinstance(sequence, basestring): + self.raw_text = sequence + elif hasattr(sequence, "items"): + # sequence is actually a dict(-like) object, so we don't have + # the raw text. + pass + else: + try: + gpg_pre_lines, lines, gpg_post_lines = \ + self.split_gpg_and_payload(sequence) + except EOFError: + # Empty input + gpg_pre_lines = lines = gpg_post_lines = [] + if gpg_pre_lines and gpg_post_lines: + raw_text = StringIO.StringIO() + raw_text.write("\n".join(gpg_pre_lines)) + raw_text.write("\n\n") + raw_text.write("\n".join(lines)) + raw_text.write("\n\n") + raw_text.write("\n".join(gpg_post_lines)) + self.raw_text = raw_text.getvalue() + try: + args = list(args) + args[0] = lines + except IndexError: + kwargs["sequence"] = lines + + _multivalued.__init__(self, *args, **kwargs) + + +class Dsc(_gpg_multivalued): + _multivalued_fields = { + "files": [ "md5sum", "size", "name" ], + "checksums-sha1": ["sha1", "size", "name"], + "checksums-sha256": ["sha256", "size", "name"], + } + + +class Changes(_gpg_multivalued): + _multivalued_fields = { + "files": [ "md5sum", "size", "section", "priority", "name" ], + "checksums-sha1": ["sha1", "size", "name"], + "checksums-sha256": ["sha256", "size", "name"], + } + + def get_pool_path(self): + """Return the path in the pool where the files would be installed""" + + # This is based on the section listed for the first file. While + # it is possible, I think, for a package to provide files in multiple + # sections, I haven't seen it in practice. In any case, this should + # probably detect such a situation and complain, or return a list... + + s = self['files'][0]['section'] + + try: + section, subsection = s.split('/') + except ValueError: + # main is implicit + section = 'main' + + if self['source'].startswith('lib'): + subdir = self['source'][:4] + else: + subdir = self['source'][0] + + return 'pool/%s/%s/%s' % (section, subdir, self['source']) + + +class PdiffIndex(_multivalued): + _multivalued_fields = { + "sha1-current": [ "SHA1", "size" ], + "sha1-history": [ "SHA1", "size", "date" ], + "sha1-patches": [ "SHA1", "size", "date" ], + } + + @property + def _fixed_field_lengths(self): + fixed_field_lengths = {} + for key in self._multivalued_fields: + if hasattr(self[key], 'keys'): + # Not multi-line -- don't need to compute the field length for + # this one + continue + length = self._get_size_field_length(key) + fixed_field_lengths[key] = {"size": length} + return fixed_field_lengths + + def _get_size_field_length(self, key): + lengths = [len(str(item['size'])) for item in self[key]] + return max(lengths) + + +class Release(_multivalued): + """Represents a Release file + + Set the size_field_behavior attribute to "dak" to make the size field + length only as long as the longest actual value. The default, + "apt-ftparchive" makes the field 16 characters long regardless. + """ + # FIXME: Add support for detecting the behavior of the input, if + # constructed from actual 822 text. + + _multivalued_fields = { + "md5sum": [ "md5sum", "size", "name" ], + "sha1": [ "sha1", "size", "name" ], + "sha256": [ "sha256", "size", "name" ], + } + + __size_field_behavior = "apt-ftparchive" + def set_size_field_behavior(self, value): + if value not in ["apt-ftparchive", "dak"]: + raise ValueError("size_field_behavior must be either " + "'apt-ftparchive' or 'dak'") + else: + self.__size_field_behavior = value + size_field_behavior = property(lambda self: self.__size_field_behavior, + set_size_field_behavior) + + @property + def _fixed_field_lengths(self): + fixed_field_lengths = {} + for key in self._multivalued_fields: + length = self._get_size_field_length(key) + fixed_field_lengths[key] = {"size": length} + return fixed_field_lengths + + def _get_size_field_length(self, key): + if self.size_field_behavior == "apt-ftparchive": + return 16 + elif self.size_field_behavior == "dak": + lengths = [len(str(item['size'])) for item in self[key]] + return max(lengths) + + +class Sources(Dsc, _PkgRelationMixin): + """Represent an APT source package list""" + + _relationship_fields = [ 'build-depends', 'build-depends-indep', + 'build-conflicts', 'build-conflicts-indep', 'binary' ] + + def __init__(self, *args, **kwargs): + Dsc.__init__(self, *args, **kwargs) + _PkgRelationMixin.__init__(self, *args, **kwargs) + + +class Packages(Deb822, _PkgRelationMixin): + """Represent an APT binary package list""" + + _relationship_fields = [ 'depends', 'pre-depends', 'recommends', + 'suggests', 'breaks', 'conflicts', 'provides', 'replaces', + 'enhances' ] + + def __init__(self, *args, **kwargs): + Deb822.__init__(self, *args, **kwargs) + _PkgRelationMixin.__init__(self, *args, **kwargs) + +### + +class _CaseInsensitiveString(str): + """Case insensitive string. + """ + + def __new__(cls, str_): + s = str.__new__(cls, str_) + s.str_lower = str_.lower() + s.str_lower_hash = hash(s.str_lower) + return s + + def __hash__(self): + return self.str_lower_hash + + def __eq__(self, other): + return self.str_lower == other.lower() + + def lower(self): + return self.str_lower + +_strI = _CaseInsensitiveString usage = 'Usage: cat Packages | add-sources [OPTION...] Sources ARCH > Packages.new' cli = OptionParser(usage=usage) @@ -32,7 +1100,7 @@ architecture = args[1] def pkg_of_src(src): global architecture, options - pkg = deb822.Packages() + pkg = Packages() pkg['Package'] = options.prefix + src['Package'] def dep_for_me(dep): @@ -114,7 +1182,7 @@ def pkg_of_src(src): for line in sys.stdin: print line, print -for src in deb822.Sources.iter_paragraphs(file(sources_file)): +for src in Sources.iter_paragraphs(file(sources_file)): if src['Architecture'] in ['any', 'all'] \ or architecture in src['Architecture'].split(): pkg = pkg_of_src(src)