bin/add-sources.py

   1 #!/usr/bin/python
   2 # vim: fileencoding=utf-8
   3
   4 # Given as input a Packages and a Sources file, produces as output a new
   5 # Packages containing fake packages which are installable if and only if the
   6 # corresponding source package has satisfiable build dependencies.
   7
   8 # Copyright (C) 2008 Stefano Zacchiroli <zack@debian.org>
   9 # This program is free software: you can redistribute it and/or modify it under
  10 # the terms of the GNU General Public License as published by the Free Software
  11 # Foundation, either version 3 of the License, or (at your option) any later
  12 # version.
  13
  14 # $Id: add-sources.py 5957 2008-08-16 18:32:17Z zack $
  15
  16 import string
  17 import sys
  18
  19 from optparse import OptionParser
  20 #from debian_bundle import deb822
  21
  22 # vim: fileencoding=utf-8
  23 #
  24 # A python interface for various rfc822-like formatted files used by Debian
  25 # (.changes, .dsc, Packages, Sources, etc)
  26 #
  27 # Copyright (C) 2005-2006  dann frazier <dannf@dannf.org>
  28 # Copyright (C) 2006-2008  John Wright <john@johnwright.org>
  29 # Copyright (C) 2006       Adeodato Simó <dato@net.com.org.es>
  30 # Copyright (C) 2008       Stefano Zacchiroli <zack@upsilon.cc>
  31 #
  32 # This program is free software; you can redistribute it and/or
  33 # modify it under the terms of the GNU General Public License
  34 # as published by the Free Software Foundation, either version 2
  35 # of the License, or (at your option) any later version.
  36 #
  37 # This program is distributed in the hope that it will be useful,
  38 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  39 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  40 # GNU General Public License for more details.
  41 #
  42 # You should have received a copy of the GNU General Public License
  43 # along with this program; if not, write to the Free Software
  44 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  45
  46 def function_deprecated_by(x): x
  47
  48 try:
  49     import apt_pkg
  50     _have_apt_pkg = True
  51 except ImportError:
  52     _have_apt_pkg = False
  53
  54 import new
  55 import re
  56 import string
  57 import sys
  58 import StringIO
  59 import UserDict
  60
  61 class OrderedSet(object):
  62     """A set-like object that preserves order when iterating over it
  63
  64     We use this to keep track of keys in Deb822Dict, because it's much faster
  65     to look up if a key is in a set than in a list.
  66     """
  67
  68     def __init__(self, iterable=[]):
  69         self.__set = set()
  70         self.__order = []
  71         for item in iterable:
  72             self.add(item)
  73
  74     def add(self, item):
  75         if item not in self:
  76             # set.add will raise TypeError if something's unhashable, so we
  77             # don't have to handle that ourselves
  78             self.__set.add(item)
  79             self.__order.append(item)
  80
  81     def remove(self, item):
  82         # set.remove will raise KeyError, so we don't need to handle that
  83         # ourselves
  84         self.__set.remove(item)
  85         self.__order.remove(item)
  86
  87     def __iter__(self):
  88         # Return an iterator of items in the order they were added
  89         return iter(self.__order)
  90
  91     def __contains__(self, item):
  92         # This is what makes OrderedSet faster than using a list to keep track
  93         # of keys.  Lookup in a set is O(1) instead of O(n) for a list.
  94         return item in self.__set
  95
  96     ### list-like methods
  97     append = add
  98
  99     def extend(self, iterable):
 100         for item in iterable:
 101             self.add(item)
 102     ###
 103
 104 class Deb822Dict(object, UserDict.DictMixin):
 105     # Subclassing UserDict.DictMixin because we're overriding so much dict
 106     # functionality that subclassing dict requires overriding many more than
 107     # the four methods that DictMixin requires.
 108     """A dictionary-like object suitable for storing RFC822-like data.
 109
 110     Deb822Dict behaves like a normal dict, except:
 111         - key lookup is case-insensitive
 112         - key order is preserved
 113         - if initialized with a _parsed parameter, it will pull values from
 114           that dictionary-like object as needed (rather than making a copy).
 115           The _parsed dict is expected to be able to handle case-insensitive
 116           keys.
 117
 118     If _parsed is not None, an optional _fields parameter specifies which keys
 119     in the _parsed dictionary are exposed.
 120     """
 121
 122     # See the end of the file for the definition of _strI
 123
 124     def __init__(self, _dict=None, _parsed=None, _fields=None):
 125         self.__dict = {}
 126         self.__keys = OrderedSet()
 127         self.__parsed = None
 128
 129         if _dict is not None:
 130             # _dict may be a dict or a list of two-sized tuples
 131             if hasattr(_dict, 'items'):
 132                 items = _dict.items()
 133             else:
 134                 items = list(_dict)
 135
 136             try:
 137                 for k, v in items:
 138                     self[k] = v
 139             except ValueError:
 140                 this = len(self.__keys)
 141                 len_ = len(items[this])
 142                 raise ValueError('dictionary update sequence element #%d has '
 143                     'length %d; 2 is required' % (this, len_))
 144
 145         if _parsed is not None:
 146             self.__parsed = _parsed
 147             if _fields is None:
 148                 self.__keys.extend([ _strI(k) for k in self.__parsed.keys() ])
 149             else:
 150                 self.__keys.extend([ _strI(f) for f in _fields if self.__parsed.has_key(f) ])
 151
 152     ### BEGIN DictMixin methods
 153
 154     def __setitem__(self, key, value):
 155         key = _strI(key)
 156         self.__keys.add(key)
 157         self.__dict[key] = value
 158
 159     def __getitem__(self, key):
 160         key = _strI(key)
 161         try:
 162             return self.__dict[key]
 163         except KeyError:
 164             if self.__parsed is not None and key in self.__keys:
 165                 return self.__parsed[key]
 166             else:
 167                 raise
 168
 169     def __delitem__(self, key):
 170         key = _strI(key)
 171         self.__keys.remove(key)
 172         try:
 173             del self.__dict[key]
 174         except KeyError:
 175             # If we got this far, the key was in self.__keys, so it must have
 176             # only been in the self.__parsed dict.
 177             pass
 178
 179     def has_key(self, key):
 180         key = _strI(key)
 181         return key in self.__keys
 182
 183     def keys(self):
 184         return [str(key) for key in self.__keys]
 185
 186     ### END DictMixin methods
 187
 188     def __repr__(self):
 189         return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()])
 190
 191     def __eq__(self, other):
 192         mykeys = self.keys(); mykeys.sort()
 193         otherkeys = other.keys(); otherkeys.sort()
 194         if not mykeys == otherkeys:
 195             return False
 196
 197         for key in mykeys:
 198             if self[key] != other[key]:
 199                 return False
 200
 201         # If we got here, everything matched
 202         return True
 203
 204     def copy(self):
 205         # Use self.__class__ so this works as expected for subclasses
 206         copy = self.__class__(self)
 207         return copy
 208
 209     # TODO implement __str__() and make dump() use that?
 210
 211
 212 class Deb822(Deb822Dict):
 213
 214     def __init__(self, sequence=None, fields=None, _parsed=None):
 215         """Create a new Deb822 instance.
 216
 217         :param sequence: a string, or any any object that returns a line of
 218             input each time, normally a file().  Alternately, sequence can
 219             be a dict that contains the initial key-value pairs.
 220
 221         :param fields: if given, it is interpreted as a list of fields that
 222             should be parsed (the rest will be discarded).
 223
 224         :param _parsed: internal parameter.
 225         """
 226
 227         if hasattr(sequence, 'items'):
 228             _dict = sequence
 229             sequence = None
 230         else:
 231             _dict = None
 232         Deb822Dict.__init__(self, _dict=_dict, _parsed=_parsed, _fields=fields)
 233
 234         if sequence is not None:
 235             try:
 236                 self._internal_parser(sequence, fields)
 237             except EOFError:
 238                 pass
 239
 240         self.gpg_info = None
 241
 242     def iter_paragraphs(cls, sequence, fields=None, use_apt_pkg=True,
 243                         shared_storage=False):
 244         """Generator that yields a Deb822 object for each paragraph in sequence.
 245
 246         :param sequence: same as in __init__.
 247
 248         :param fields: likewise.
 249
 250         :param use_apt_pkg: if sequence is a file(), apt_pkg will be used
 251             if available to parse the file, since it's much much faster.  Set
 252             this parameter to False to disable using apt_pkg.
 253         :param shared_storage: if sequence is a file(), use_apt_pkg is True,
 254             and shared_storage is True, yielded objects will share storage, so
 255             they can't be kept across iterations.  (Also, PGP signatures won't
 256             be stripped.)  By default, this parameter is False, causing a copy
 257             of the parsed data to be made through each iteration.  Except for
 258             with raw Deb822 paragraphs (as opposed to _multivalued subclasses),
 259             the speed gained by setting shared_storage=True is marginal.  This
 260             parameter has no effect if use_apt_pkg is False or apt_pkg is not
 261             available.
 262         """
 263
 264         if _have_apt_pkg and use_apt_pkg and isinstance(sequence, file):
 265             parser = apt_pkg.ParseTagFile(sequence)
 266             while parser.Step() == 1:
 267                 if shared_storage:
 268                     parsed = parser.Section
 269                 else:
 270                     # Since parser.Section doesn't have an items method, we
 271                     # need to imitate that method here and make a Deb822Dict
 272                     # from the result in order to preserve order.
 273                     items = [(key, parser.Section[key])
 274                              for key in parser.Section.keys()]
 275                     parsed = Deb822Dict(items)
 276                 yield cls(fields=fields, _parsed=parsed)
 277
 278         else:
 279             iterable = iter(sequence)
 280             x = cls(iterable, fields)
 281             while len(x) != 0:
 282                 yield x
 283                 x = cls(iterable, fields)
 284
 285     iter_paragraphs = classmethod(iter_paragraphs)
 286
 287     ###
 288
 289     def _internal_parser(self, sequence, fields=None):
 290         single = re.compile("^(?P<key>\S+)\s*:\s*(?P<data>\S.*?)\s*$")
 291         multi = re.compile("^(?P<key>\S+)\s*:\s*$")
 292         multidata = re.compile("^\s(?P<data>.+?)\s*$")
 293
 294         wanted_field = lambda f: fields is None or f in fields
 295
 296         if isinstance(sequence, basestring):
 297             sequence = sequence.splitlines()
 298
 299         curkey = None
 300         content = ""
 301         for line in self.gpg_stripped_paragraph(sequence):
 302             m = single.match(line)
 303             if m:
 304                 if curkey:
 305                     self[curkey] += content
 306
 307                 if not wanted_field(m.group('key')):
 308                     curkey = None
 309                     continue
 310
 311                 curkey = m.group('key')
 312                 self[curkey] = m.group('data')
 313                 content = ""
 314                 continue
 315
 316             m = multi.match(line)
 317             if m:
 318                 if curkey:
 319                     self[curkey] += content
 320
 321                 if not wanted_field(m.group('key')):
 322                     curkey = None
 323                     continue
 324
 325                 curkey = m.group('key')
 326                 self[curkey] = ""
 327                 content = ""
 328                 continue
 329
 330             m = multidata.match(line)
 331             if m:
 332                 content += '\n' + line # XXX not m.group('data')?
 333                 continue
 334
 335         if curkey:
 336             self[curkey] += content
 337
 338     def __str__(self):
 339         return self.dump()
 340
 341     # __repr__ is handled by Deb822Dict
 342
 343     def dump(self, fd=None):
 344         """Dump the the contents in the original format
 345
 346         If fd is None, return a string.
 347         """
 348
 349         if fd is None:
 350             fd = StringIO.StringIO()
 351             return_string = True
 352         else:
 353             return_string = False
 354         for key, value in self.iteritems():
 355             if not value or value[0] == '\n':
 356                 # Avoid trailing whitespace after "Field:" if it's on its own
 357                 # line or the value is empty
 358                 # XXX Uh, really print value if value == '\n'?
 359                 fd.write('%s:%s\n' % (key, value))
 360             else:
 361                 fd.write('%s: %s\n' % (key, value))
 362         if return_string:
 363             return fd.getvalue()
 364
 365     ###
 366
 367     def is_single_line(self, s):
 368         if s.count("\n"):
 369             return False
 370         else:
 371             return True
 372
 373     isSingleLine = function_deprecated_by(is_single_line)
 374
 375     def is_multi_line(self, s):
 376         return not self.is_single_line(s)
 377
 378     isMultiLine = function_deprecated_by(is_multi_line)
 379
 380     def _merge_fields(self, s1, s2):
 381         if not s2:
 382             return s1
 383         if not s1:
 384             return s2
 385
 386         if self.is_single_line(s1) and self.is_single_line(s2):
 387             ## some fields are delimited by a single space, others
 388             ## a comma followed by a space.  this heuristic assumes
 389             ## that there are multiple items in one of the string fields
 390             ## so that we can pick up on the delimiter being used
 391             delim = ' '
 392             if (s1 + s2).count(', '):
 393                 delim = ', '
 394
 395             L = (s1 + delim + s2).split(delim)
 396             L.sort()
 397
 398             prev = merged = L[0]
 399
 400             for item in L[1:]:
 401                 ## skip duplicate entries
 402                 if item == prev:
 403                     continue
 404                 merged = merged + delim + item
 405                 prev = item
 406             return merged
 407
 408         if self.is_multi_line(s1) and self.is_multi_line(s2):
 409             for item in s2.splitlines(True):
 410                 if item not in s1.splitlines(True):
 411                     s1 = s1 + "\n" + item
 412             return s1
 413
 414         raise ValueError
 415
 416     _mergeFields = function_deprecated_by(_merge_fields)
 417
 418     def merge_fields(self, key, d1, d2=None):
 419         ## this method can work in two ways - abstract that away
 420         if d2 == None:
 421             x1 = self
 422             x2 = d1
 423         else:
 424             x1 = d1
 425             x2 = d2
 426
 427         ## we only have to do work if both objects contain our key
 428         ## otherwise, we just take the one that does, or raise an
 429         ## exception if neither does
 430         if key in x1 and key in x2:
 431             merged = self._mergeFields(x1[key], x2[key])
 432         elif key in x1:
 433             merged = x1[key]
 434         elif key in x2:
 435             merged = x2[key]
 436         else:
 437             raise KeyError
 438
 439         ## back to the two different ways - if this method was called
 440         ## upon an object, update that object in place.
 441         ## return nothing in this case, to make the author notice a
 442         ## problem if she assumes the object itself will not be modified
 443         if d2 == None:
 444             self[key] = merged
 445             return None
 446
 447         return merged
 448
 449     mergeFields = function_deprecated_by(merge_fields)
 450
 451     def split_gpg_and_payload(sequence):
 452         """Return a (gpg_pre, payload, gpg_post) tuple
 453
 454         Each element of the returned tuple is a list of lines (with trailing
 455         whitespace stripped).
 456         """
 457
 458         gpg_pre_lines = []
 459         lines = []
 460         gpg_post_lines = []
 461         state = 'SAFE'
 462         gpgre = re.compile(r'^-----(?P<action>BEGIN|END) PGP (?P<what>[^-]+)-----$')
 463         blank_line = re.compile('^$')
 464         first_line = True
 465
 466         for line in sequence:
 467             line = line.strip('\r\n')
 468
 469             # skip initial blank lines, if any
 470             if first_line:
 471                 if blank_line.match(line):
 472                     continue
 473                 else:
 474                     first_line = False
 475
 476             m = gpgre.match(line)
 477
 478             if not m:
 479                 if state == 'SAFE':
 480                     if not blank_line.match(line):
 481                         lines.append(line)
 482                     else:
 483                         if not gpg_pre_lines:
 484                             # There's no gpg signature, so we should stop at
 485                             # this blank line
 486                             break
 487                 elif state == 'SIGNED MESSAGE':
 488                     if blank_line.match(line):
 489                         state = 'SAFE'
 490                     else:
 491                         gpg_pre_lines.append(line)
 492                 elif state == 'SIGNATURE':
 493                     gpg_post_lines.append(line)
 494             else:
 495                 if m.group('action') == 'BEGIN':
 496                     state = m.group('what')
 497                 elif m.group('action') == 'END':
 498                     gpg_post_lines.append(line)
 499                     break
 500                 if not blank_line.match(line):
 501                     if not lines:
 502                         gpg_pre_lines.append(line)
 503                     else:
 504                         gpg_post_lines.append(line)
 505
 506         if len(lines):
 507             return (gpg_pre_lines, lines, gpg_post_lines)
 508         else:
 509             raise EOFError('only blank lines found in input')
 510
 511     split_gpg_and_payload = staticmethod(split_gpg_and_payload)
 512
 513     def gpg_stripped_paragraph(cls, sequence):
 514         return cls.split_gpg_and_payload(sequence)[1]
 515
 516     gpg_stripped_paragraph = classmethod(gpg_stripped_paragraph)
 517
 518     def get_gpg_info(self):
 519         """Return a GpgInfo object with GPG signature information
 520
 521         This method will raise ValueError if the signature is not available
 522         (e.g. the original text cannot be found)"""
 523
 524         # raw_text is saved (as a string) only for Changes and Dsc (see
 525         # _gpg_multivalued.__init__) which is small compared to Packages or
 526         # Sources which contain no signature
 527         if not hasattr(self, 'raw_text'):
 528             raise ValueError, "original text cannot be found"
 529
 530         if self.gpg_info is None:
 531             self.gpg_info = GpgInfo.from_sequence(self.raw_text)
 532
 533         return self.gpg_info
 534
 535 ###
 536
 537 # XXX check what happens if input contains more that one signature
 538 class GpgInfo(dict):
 539     """A wrapper around gnupg parsable output obtained via --status-fd
 540
 541     This class is really a dictionary containing parsed output from gnupg plus
 542     some methods to make sense of the data.
 543     Keys are keywords and values are arguments suitably splitted.
 544     See /usr/share/doc/gnupg/DETAILS.gz"""
 545
 546     # keys with format "key keyid uid"
 547     uidkeys = ('GOODSIG', 'EXPSIG', 'EXPKEYSIG', 'REVKEYSIG', 'BADSIG')
 548
 549     def valid(self):
 550         """Is the signature valid?"""
 551         return self.has_key('GOODSIG') or self.has_key('VALIDSIG')
 552
 553 # XXX implement as a property?
 554 # XXX handle utf-8 %-encoding
 555     def uid(self):
 556         """Return the primary ID of the signee key, None is not available"""
 557         pass
 558
 559     @staticmethod
 560     def from_output(out, err=None):
 561         """Create a new GpgInfo object from gpg(v) --status-fd output (out) and
 562         optionally collect stderr as well (err).
 563
 564         Both out and err can be lines in newline-terminated sequence or regular strings."""
 565
 566         n = GpgInfo()
 567
 568         if isinstance(out, basestring):
 569             out = out.split('\n')
 570         if isinstance(err, basestring):
 571             err = err.split('\n')
 572
 573         n.out = out
 574         n.err = err
 575
 576         header = '[GNUPG:] '
 577         for l in out:
 578             if not l.startswith(header):
 579                 continue
 580
 581             l = l[len(header):]
 582             l = l.strip('\n')
 583
 584             # str.partition() would be better, 2.5 only though
 585             s = l.find(' ')
 586             key = l[:s]
 587             if key in GpgInfo.uidkeys:
 588                 # value is "keyid UID", don't split UID
 589                 value = l[s+1:].split(' ', 1)
 590             else:
 591                 value = l[s+1:].split(' ')
 592
 593             n[key] = value
 594         return n
 595
 596 # XXX how to handle sequences of lines? file() returns \n-terminated
 597     @staticmethod
 598     def from_sequence(sequence, keyrings=['/usr/share/keyrings/debian-keyring.gpg'],
 599             executable=["/usr/bin/gpgv"]):
 600         """Create a new GpgInfo object from the given sequence.
 601
 602         Sequence is a sequence of lines or a string
 603         executable is a list of args for subprocess.Popen, the first element being the gpg executable"""
 604
 605         # XXX check for gpg as well and use --verify accordingly?
 606         args = executable
 607         #args.extend(["--status-fd", "1", "--no-default-keyring"])
 608         args.extend(["--status-fd", "1"])
 609         import os
 610         [args.extend(["--keyring", k]) for k in keyrings if os.path.isfile(k) and os.access(k, os.R_OK)]
 611
 612         if "--keyring" not in args:
 613             raise IOError, "cannot access none of given keyrings"
 614
 615         import subprocess
 616         p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 617         # XXX what to do with exit code?
 618
 619         if isinstance(sequence, basestring):
 620             (out, err) = p.communicate(sequence)
 621         else:
 622             (out, err) = p.communicate("\n".join(sequence))
 623
 624         return GpgInfo.from_output(out, err)
 625
 626     @staticmethod
 627     def from_file(target, *args):
 628         """Create a new GpgInfo object from the given file, calls from_sequence(file(target), *args)"""
 629         return from_sequence(file(target), *args)
 630
 631 ###
 632
 633 class PkgRelation(object):
 634     """Inter-package relationships
 635
 636     Structured representation of the relationships of a package to another,
 637     i.e. of what can appear in a Deb882 field like Depends, Recommends,
 638     Suggests, ... (see Debian Policy 7.1).
 639     """
 640
 641     # XXX *NOT* a real dependency parser, and that is not even a goal here, we
 642     # just parse as much as we need to split the various parts composing a
 643     # dependency, checking their correctness wrt policy is out of scope
 644     __dep_RE = re.compile( \
 645             r'^\s*(?P<name>[a-zA-Z0-9.+\-]{2,})(\s*\(\s*(?P<relop>[>=<]+)\s*(?P<version>[0-9a-zA-Z:\-+~.]+)\s*\))?(\s*\[(?P<archs>[\s!\w\-]+)\])?\s*$')
 646     __comma_sep_RE = re.compile(r'\s*,\s*')
 647     __pipe_sep_RE = re.compile(r'\s*\|\s*')
 648     __blank_sep_RE = re.compile(r'\s*')
 649
 650     @classmethod
 651     def parse_relations(cls, raw):
 652         """Parse a package relationship string (i.e. the value of a field like
 653         Depends, Recommends, Build-Depends ...)
 654         """
 655         def parse_archs(raw):
 656             # assumption: no space beween '!' and architecture name
 657             archs = []
 658             for arch in cls.__blank_sep_RE.split(raw.strip()):
 659                 if len(arch) and arch[0] == '!':
 660                     archs.append((False, arch[1:]))
 661                 else:
 662                     archs.append((True, arch))
 663             return archs
 664
 665         def parse_rel(raw):
 666             match = cls.__dep_RE.match(raw)
 667             if match:
 668                 parts = match.groupdict()
 669                 d = { 'name': parts['name'] }
 670                 if not (parts['relop'] is None or parts['version'] is None):
 671                     d['version'] = (parts['relop'], parts['version'])
 672                 else:
 673                     d['version'] = None
 674                 if parts['archs'] is None:
 675                     d['arch'] = None
 676                 else:
 677                     d['arch'] = parse_archs(parts['archs'])
 678                 return d
 679             else:
 680                 print >> sys.stderr, \
 681                         'deb822.py: WARNING: cannot parse package' \
 682                         ' relationship "%s", returning it raw' % raw
 683                 return { 'name': raw, 'version': None, 'arch': None }
 684
 685         tl_deps = cls.__comma_sep_RE.split(raw.strip()) # top-level deps
 686         cnf = map(cls.__pipe_sep_RE.split, tl_deps)
 687         return map(lambda or_deps: map(parse_rel, or_deps), cnf)
 688
 689     @staticmethod
 690     def str(rels):
 691         """Format to string structured inter-package relationships
 692
 693         Perform the inverse operation of parse_relations, returning a string
 694         suitable to be written in a package stanza.
 695         """
 696         def pp_arch(arch_spec):
 697             (excl, arch) = arch_spec
 698             if excl:
 699                 return arch
 700             else:
 701                 return '!' + arch
 702
 703         def pp_atomic_dep(dep):
 704             s = dep['name']
 705             if dep.has_key('version') and dep['version'] is not None:
 706                 s += ' (%s %s)' % dep['version']
 707             if dep.has_key('arch') and dep['arch'] is not None:
 708                 s += ' [%s]' % string.join(map(pp_arch, dep['arch']))
 709             return s
 710
 711         pp_or_dep = lambda deps: string.join(map(pp_atomic_dep, deps), ' | ')
 712         return string.join(map(pp_or_dep, rels), ', ')
 713
 714
 715 class _lowercase_dict(dict):
 716     """Dictionary wrapper which lowercase keys upon lookup."""
 717
 718     def __getitem__(self, key):
 719         return dict.__getitem__(self, key.lower())
 720
 721
 722 class _PkgRelationMixin(object):
 723     """Package relationship mixin
 724
 725     Inheriting from this mixin you can extend a Deb882 object with attributes
 726     letting you access inter-package relationship in a structured way, rather
 727     than as strings. For example, while you can usually use pkg['depends'] to
 728     obtain the Depends string of package pkg, mixing in with this class you
 729     gain pkg.depends to access Depends as a Pkgrel instance
 730
 731     To use, subclass _PkgRelationMixin from a class with a _relationship_fields
 732     attribute. It should be a list of field names for which structured access
 733     is desired; for each of them a method wild be added to the inherited class.
 734     The method name will be the lowercase version of field name; '-' will be
 735     mangled as '_'. The method would return relationships in the same format of
 736     the PkgRelation' relations property.
 737
 738     See Packages and Sources as examples.
 739     """
 740
 741     def __init__(self, *args, **kwargs):
 742         self.__relations = _lowercase_dict({})
 743         self.__parsed_relations = False
 744         for name in self._relationship_fields:
 745             # To avoid reimplementing Deb822 key lookup logic we use a really
 746             # simple dict subclass which just lowercase keys upon lookup. Since
 747             # dictionary building happens only here, we ensure that all keys
 748             # are in fact lowercase.
 749             # With this trick we enable users to use the same key (i.e. field
 750             # name) of Deb822 objects on the dictionary returned by the
 751             # relations property.
 752             keyname = name.lower()
 753             if self.has_key(name):
 754                 self.__relations[keyname] = None   # lazy value
 755                     # all lazy values will be expanded before setting
 756                     # __parsed_relations to True
 757             else:
 758                 self.__relations[keyname] = []
 759
 760     @property
 761     def relations(self):
 762         """Return a dictionary of inter-package relationships among the current
 763         and other packages.
 764
 765         Dictionary keys depend on the package kind. Binary packages have keys
 766         like 'depends', 'recommends', ... while source packages have keys like
 767         'build-depends', 'build-depends-indep' and so on. See the Debian policy
 768         for the comprehensive field list.
 769
 770         Dictionary values are package relationships returned as lists of lists
 771         of dictionaries (see below for some examples).
 772
 773         The encoding of package relationships is as follows:
 774         - the top-level lists corresponds to the comma-separated list of
 775           Deb822, their components form a conjuction, i.e. they have to be
 776           AND-ed together
 777         - the inner lists corresponds to the pipe-separated list of Deb822,
 778           their components form a disjunction, i.e. they have to be OR-ed
 779           together
 780         - member of the inner lists are dictionaries with the following keys:
 781           - name:       package (or virtual package) name
 782           - version:    A pair <operator, version> if the relationship is
 783                         versioned, None otherwise. operator is one of "<<",
 784                         "<=", "=", ">=", ">>"; version is the given version as
 785                         a string.
 786           - arch:       A list of pairs <polarity, architecture> if the
 787                         relationship is architecture specific, None otherwise.
 788                         Polarity is a boolean (false if the architecture is
 789                         negated with "!", true otherwise), architecture the
 790                         Debian archtiecture name as a string.
 791
 792         Examples:
 793
 794           "emacs | emacsen, make, debianutils (>= 1.7)"     becomes
 795           [ [ {'name': 'emacs'}, {'name': 'emacsen'} ],
 796             [ {'name': 'make'} ],
 797             [ {'name': 'debianutils', 'version': ('>=', '1.7')} ] ]
 798
 799           "tcl8.4-dev, procps [!hurd-i386]"                 becomes
 800           [ [ {'name': 'tcl8.4-dev'} ],
 801             [ {'name': 'procps', 'arch': (false, 'hurd-i386')} ] ]
 802         """
 803         if not self.__parsed_relations:
 804             lazy_rels = filter(lambda n: self.__relations[n] is None,
 805                     self.__relations.keys())
 806             for n in lazy_rels:
 807                 self.__relations[n] = PkgRelation.parse_relations(self[n])
 808             self.__parsed_relations = True
 809         return self.__relations
 810
 811 class _multivalued(Deb822):
 812     """A class with (R/W) support for multivalued fields.
 813
 814     To use, create a subclass with a _multivalued_fields attribute.  It should
 815     be a dictionary with *lower-case* keys, with lists of human-readable
 816     identifiers of the fields as the values.  Please see Dsc, Changes, and
 817     PdiffIndex as examples.
 818     """
 819
 820     def __init__(self, *args, **kwargs):
 821         Deb822.__init__(self, *args, **kwargs)
 822
 823         for field, fields in self._multivalued_fields.items():
 824             try:
 825                 contents = self[field]
 826             except KeyError:
 827                 continue
 828
 829             if self.is_multi_line(contents):
 830                 self[field] = []
 831                 updater_method = self[field].append
 832             else:
 833                 self[field] = Deb822Dict()
 834                 updater_method = self[field].update
 835
 836             for line in filter(None, contents.splitlines()):
 837                 updater_method(Deb822Dict(zip(fields, line.split())))
 838
 839     def dump(self, fd=None):
 840         """Dump the contents in the original format
 841
 842         If fd is None, return a string.
 843         """
 844
 845         if fd is None:
 846             fd = StringIO.StringIO()
 847             return_string = True
 848         else:
 849             return_string = False
 850         for key in self.keys():
 851             keyl = key.lower()
 852             if keyl not in self._multivalued_fields:
 853                 value = self[key]
 854                 if not value or value[0] == '\n':
 855                     # XXX Uh, really print value if value == '\n'?
 856                     fd.write('%s:%s\n' % (key, value))
 857                 else:
 858                     fd.write('%s: %s\n' % (key, value))
 859             else:
 860                 fd.write(key + ":")
 861                 if hasattr(self[key], 'keys'): # single-line
 862                     array = [ self[key] ]
 863                 else: # multi-line
 864                     fd.write("\n")
 865                     array = self[key]
 866
 867                 order = self._multivalued_fields[keyl]
 868                 try:
 869                     field_lengths = self._fixed_field_lengths
 870                 except AttributeError:
 871                     field_lengths = {}
 872                 for item in array:
 873                     for x in order:
 874                         raw_value = str(item[x])
 875                         try:
 876                             length = field_lengths[keyl][x]
 877                         except KeyError:
 878                             value = raw_value
 879                         else:
 880                             value = (length - len(raw_value)) * " " + raw_value
 881                         fd.write(" %s" % value)
 882                     fd.write("\n")
 883         if return_string:
 884             return fd.getvalue()
 885
 886
 887 ###
 888
 889
 890 class _gpg_multivalued(_multivalued):
 891     """A _multivalued class that can support gpg signed objects
 892
 893     This class's feature is that it stores the raw text before parsing so that
 894     gpg can verify the signature.  Use it just like you would use the
 895     _multivalued class.
 896
 897     This class only stores raw text if it is given a raw string, or if it
 898     detects a gpg signature when given a file or sequence of lines (see
 899     Deb822.split_gpg_and_payload for details).
 900     """
 901
 902     def __init__(self, *args, **kwargs):
 903         try:
 904             sequence = args[0]
 905         except IndexError:
 906             sequence = kwargs.get("sequence", None)
 907
 908         if sequence is not None:
 909             if isinstance(sequence, basestring):
 910                 self.raw_text = sequence
 911             elif hasattr(sequence, "items"):
 912                 # sequence is actually a dict(-like) object, so we don't have
 913                 # the raw text.
 914                 pass
 915             else:
 916                 try:
 917                     gpg_pre_lines, lines, gpg_post_lines = \
 918                             self.split_gpg_and_payload(sequence)
 919                 except EOFError:
 920                     # Empty input
 921                     gpg_pre_lines = lines = gpg_post_lines = []
 922                 if gpg_pre_lines and gpg_post_lines:
 923                     raw_text = StringIO.StringIO()
 924                     raw_text.write("\n".join(gpg_pre_lines))
 925                     raw_text.write("\n\n")
 926                     raw_text.write("\n".join(lines))
 927                     raw_text.write("\n\n")
 928                     raw_text.write("\n".join(gpg_post_lines))
 929                     self.raw_text = raw_text.getvalue()
 930                 try:
 931                     args = list(args)
 932                     args[0] = lines
 933                 except IndexError:
 934                     kwargs["sequence"] = lines
 935
 936         _multivalued.__init__(self, *args, **kwargs)
 937
 938
 939 class Dsc(_gpg_multivalued):
 940     _multivalued_fields = {
 941         "files": [ "md5sum", "size", "name" ],
 942         "checksums-sha1": ["sha1", "size", "name"],
 943         "checksums-sha256": ["sha256", "size", "name"],
 944     }
 945
 946
 947 class Changes(_gpg_multivalued):
 948     _multivalued_fields = {
 949         "files": [ "md5sum", "size", "section", "priority", "name" ],
 950         "checksums-sha1": ["sha1", "size", "name"],
 951         "checksums-sha256": ["sha256", "size", "name"],
 952     }
 953
 954     def get_pool_path(self):
 955         """Return the path in the pool where the files would be installed"""
 956
 957         # This is based on the section listed for the first file.  While
 958         # it is possible, I think, for a package to provide files in multiple
 959         # sections, I haven't seen it in practice.  In any case, this should
 960         # probably detect such a situation and complain, or return a list...
 961
 962         s = self['files'][0]['section']
 963
 964         try:
 965             section, subsection = s.split('/')
 966         except ValueError:
 967             # main is implicit
 968             section = 'main'
 969
 970         if self['source'].startswith('lib'):
 971             subdir = self['source'][:4]
 972         else:
 973             subdir = self['source'][0]
 974
 975         return 'pool/%s/%s/%s' % (section, subdir, self['source'])
 976
 977
 978 class PdiffIndex(_multivalued):
 979     _multivalued_fields = {
 980         "sha1-current": [ "SHA1", "size" ],
 981         "sha1-history": [ "SHA1", "size", "date" ],
 982         "sha1-patches": [ "SHA1", "size", "date" ],
 983     }
 984
 985     @property
 986     def _fixed_field_lengths(self):
 987         fixed_field_lengths = {}
 988         for key in self._multivalued_fields:
 989             if hasattr(self[key], 'keys'):
 990                 # Not multi-line -- don't need to compute the field length for
 991                 # this one
 992                 continue
 993             length = self._get_size_field_length(key)
 994             fixed_field_lengths[key] = {"size": length}
 995         return fixed_field_lengths
 996
 997     def _get_size_field_length(self, key):
 998         lengths = [len(str(item['size'])) for item in self[key]]
 999         return max(lengths)
1000
1001
1002 class Release(_multivalued):
1003     """Represents a Release file
1004
1005     Set the size_field_behavior attribute to "dak" to make the size field
1006     length only as long as the longest actual value.  The default,
1007     "apt-ftparchive" makes the field 16 characters long regardless.
1008     """
1009     # FIXME: Add support for detecting the behavior of the input, if
1010     # constructed from actual 822 text.
1011
1012     _multivalued_fields = {
1013         "md5sum": [ "md5sum", "size", "name" ],
1014         "sha1": [ "sha1", "size", "name" ],
1015         "sha256": [ "sha256", "size", "name" ],
1016     }
1017
1018     __size_field_behavior = "apt-ftparchive"
1019     def set_size_field_behavior(self, value):
1020         if value not in ["apt-ftparchive", "dak"]:
1021             raise ValueError("size_field_behavior must be either "
1022                              "'apt-ftparchive' or 'dak'")
1023         else:
1024             self.__size_field_behavior = value
1025     size_field_behavior = property(lambda self: self.__size_field_behavior,
1026                                    set_size_field_behavior)
1027
1028     @property
1029     def _fixed_field_lengths(self):
1030         fixed_field_lengths = {}
1031         for key in self._multivalued_fields:
1032             length = self._get_size_field_length(key)
1033             fixed_field_lengths[key] = {"size": length}
1034         return fixed_field_lengths
1035
1036     def _get_size_field_length(self, key):
1037         if self.size_field_behavior == "apt-ftparchive":
1038             return 16
1039         elif self.size_field_behavior == "dak":
1040             lengths = [len(str(item['size'])) for item in self[key]]
1041             return max(lengths)
1042
1043
1044 class Sources(Dsc, _PkgRelationMixin):
1045     """Represent an APT source package list"""
1046
1047     _relationship_fields = [ 'build-depends', 'build-depends-indep',
1048             'build-conflicts', 'build-conflicts-indep', 'binary' ]
1049
1050     def __init__(self, *args, **kwargs):
1051         Dsc.__init__(self, *args, **kwargs)
1052         _PkgRelationMixin.__init__(self, *args, **kwargs)
1053
1054
1055 class Packages(Deb822, _PkgRelationMixin):
1056     """Represent an APT binary package list"""
1057
1058     _relationship_fields = [ 'depends', 'pre-depends', 'recommends',
1059             'suggests', 'breaks', 'conflicts', 'provides', 'replaces',
1060             'enhances' ]
1061
1062     def __init__(self, *args, **kwargs):
1063         Deb822.__init__(self, *args, **kwargs)
1064         _PkgRelationMixin.__init__(self, *args, **kwargs)
1065
1066 ###
1067
1068 class _CaseInsensitiveString(str):
1069     """Case insensitive string.
1070     """
1071
1072     def __new__(cls, str_):
1073         s = str.__new__(cls, str_)
1074         s.str_lower = str_.lower()
1075         s.str_lower_hash = hash(s.str_lower)
1076         return s
1077
1078     def __hash__(self):
1079         return self.str_lower_hash
1080
1081     def __eq__(self, other):
1082         return self.str_lower == other.lower()
1083
1084     def lower(self):
1085         return self.str_lower
1086
1087 _strI = _CaseInsensitiveString
1088
1089 usage = 'Usage: cat Packages | add-sources [OPTION...] Sources ARCH > Packages.new'
1090 cli = OptionParser(usage=usage)
1091 cli.add_option('-p', '--prefix', dest='prefix', default='source---',
1092         help='set the prefix for fake source packages to PREFIX (default: source---)',
1093         metavar='PREFIX')
1094 (options, args) = cli.parse_args()
1095 if len(args) != 2:
1096     cli.print_help()
1097     sys.exit(2)
1098 sources_file = args[0]
1099 architecture = args[1]
1100
1101 def pkg_of_src(src):
1102     global architecture, options
1103     pkg = Packages()
1104     pkg['Package'] = options.prefix + src['Package']
1105
1106     def dep_for_me(dep):
1107         for_me = None
1108         if dep['arch'] is None:
1109             for_me = True
1110         elif dep['arch']:
1111             (polarity, _) = dep['arch'][0]
1112             if polarity:    # list is inclusive
1113                 for_me = (True, architecture) in dep['arch']
1114             else:   # list is exclusive
1115                 for_me = not ((False, architecture) in dep['arch'])
1116         else:
1117             for_me = False
1118         return for_me
1119
1120     def mk_bin_rels(fields, relations):
1121         def strip_arch(dep):
1122             dep['arch'] = None
1123             return dep
1124
1125         def get_rels(fields, relations):
1126             rels = []
1127             for name in fields:
1128                 if relations.has_key(name):
1129                     rels.extend(relations[name])
1130             return rels
1131
1132         src_rels = get_rels(fields, relations)
1133         bin_rels = []
1134         for or_deps in src_rels:
1135             my_or_deps = map(strip_arch, filter(dep_for_me, or_deps))
1136             if my_or_deps:
1137                 bin_rels.append(my_or_deps)
1138
1139         return bin_rels
1140
1141     def str_of_relations(rels):
1142         # XXX this is cut and paste from python-debian's deb822.py, more
1143         # precisely it matches the str() method of the PkgRelation class
1144         # TODO to be removed as soon as python-debian 0.1.12 hits unstable
1145         def pp_arch(arch_spec):
1146             (excl, arch) = arch_spec
1147             if excl:
1148                 return arch
1149             else:
1150                 return '!' + arch
1151         def pp_atomic_dep(dep):
1152             s = dep['name']
1153             if dep.has_key('version') and dep['version'] is not None:
1154                 s += ' (%s %s)' % dep['version']
1155             if dep.has_key('arch') and dep['arch'] is not None:
1156                 s += ' [%s]' % string.join(map(pp_arch, dep['arch']))
1157             return s
1158         pp_or_dep = lambda deps: string.join(map(pp_atomic_dep, deps), ' | ')
1159         return string.join(map(pp_or_dep, rels), ', ')
1160
1161     for field in ['Version', 'Priority', 'Section', 'Maintainer']:
1162         if src.has_key(field):
1163             pkg[field] = src[field]
1164     bin_depends = mk_bin_rels(['build-depends', 'build-depends-indep'],
1165             src.relations)
1166     if bin_depends:
1167         #pkg['Depends'] = deb822.PkgRelation.str(bin_depends)
1168         pkg['Depends'] = str_of_relations(bin_depends)
1169     bin_conflicts = mk_bin_rels(['build-conflicts', 'build-conflicts-indep'],
1170             src.relations)
1171     if bin_conflicts:
1172         #pkg['Conflicts'] = deb822.PkgRelation.str(bin_conflicts)
1173         pkg['Conflicts'] = str_of_relations(bin_conflicts)
1174     pkg['Description'] = 'dummy counterpart of "%s" source package' % \
1175             src['Package']
1176     pkg['Description'] += "\n I don't exist, go away."
1177     pkg['Architecture'] = 'all'
1178
1179     return pkg
1180
1181 #for pkg in deb822.Packages.iter_paragraphs(sys.stdin):
1182 for line in sys.stdin:
1183     print line,
1184 print
1185 for src in Sources.iter_paragraphs(file(sources_file)):
1186     if src['Architecture'] in ['any', 'all'] \
1187             or architecture in src['Architecture'].split():
1188         pkg = pkg_of_src(src)
1189         print pkg
1190