"Generate package <-> file mapping"),
("generate-releases",
"Generate Release files"),
+ ("generate-contents",
+ "Generate contest files"),
("generate-index-diffs",
"Generate .diff/Index files"),
("clean-suites",
"Check for users with no packages in the archive"),
("import-archive",
"Populate SQL database based from an archive tree"),
+ ("import-contents",
+ "Populate SQL database with Contents files"),
("import-keyring",
"Populate fingerprint/uid table based on a new/updated keyring"),
("import-ldap-fingerprints",
################################################################################
def do_update(self):
+vvvvvvvvvvvvvvvvvvvv
print "Note: to be able to enable the the PL/Perl (plperl) procedural language, we do"
print "need postgresql-plperl-$postgres-version installed. Make sure that this is the"
print "case before you continue. Interrupt if it isn't, sleeping 5 seconds now."
print "(We need to be database superuser for this to work!)"
time.sleep (5)
+^^^^^^^^^^^^^^^^^^^^
try:
c = self.db.cursor()
--- /dev/null
+#!/usr/bin/env python
+# coding=utf8
+
+"""
+Debian Archive Kit Database Update Script
+Copyright © 2008 Michael Casadevall <mcasadevall@debian.org>
+Copyright © 2008 Roger Leigh <rleigh@debian.org>
+
+Debian Archive Kit Database Update Script 2
+"""
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+# <tomv_w> really, if we want to screw ourselves, let's find a better way.
+# <Ganneff> rm -rf /srv/ftp.debian.org
+
+################################################################################
+
+import psycopg2, time
+
+################################################################################
+
+def do_update(self):
+ print "Adding content fields to database"
+
+ try:
+ c = self.db.cursor()
+ c.execute("""CREATE TABLE content_file_paths (
+ id serial primary key not null,
+ path text unique not null
+ )""")
+
+ c.execute("""CREATE TABLE content_file_names (
+ id serial primary key not null,
+ file text unique not null
+ )""")
+
+ c.execute("""CREATE TABLE content_associations (
+ id serial not null,
+ binary_pkg int4 not null references binaries(id) on delete cascade,
+ filepath int4 not null references content_file_paths(id) on delete cascade,
+ filename int4 not null references content_file_names(id) on delete cascade
+ );""")
+
+ c.execute("""CREATE FUNCTION comma_concat(text, text) RETURNS text
+ AS $_$select case
+ WHEN $2 is null or $2 = '' THEN $1
+ WHEN $1 is null or $1 = '' THEN $2
+ ELSE $1 || ',' || $2
+ END$_$
+ LANGUAGE sql""")
+
+ c.execute("""CREATE AGGREGATE comma_separated_list (
+ BASETYPE = text,
+ SFUNC = comma_concat,
+ STYPE = text,
+ INITCOND = ''
+ );""")
+
+ c.execute("UPDATE config SET value = '2' WHERE name = 'db_revision'")
+ self.db.commit()
+
+ print "REMINDER: Remember to fully regenerate the Contents files before running import-contents"
+ print ""
+ print "Pausing for five seconds ..."
+ time.sleep (5)
+
+ except psycopg2.ProgrammingError, msg:
+ self.db.rollback()
+ print "FATAL: Unable to apply debversion table update 2!"
+ print "Error Message: " + str(msg)
+ print "Database changes have been rolled back."
###############################################################################
import errno, fcntl, os, sys, time, re
-import apt_pkg
+import apt_pkg, tarfile, commands
from daklib import database
from daklib import logging
from daklib import queue
else:
os.unlink(self.log_filename)
+
###############################################################################
+
def reject (str, prefix="Rejected: "):
global reject_message
if str:
suite_id = database.get_suite_id(suite)
projectB.query("INSERT INTO bin_associations (suite, bin) VALUES (%d, currval('binaries_id_seq'))" % (suite_id))
+ # insert contents into the database
+ contents = utils.generate_contents_information(file)
+ q = projectB.query("SELECT currval('binaries_id_seq')")
+ bin_id = int(q.getresult()[0][0])
+ for file in contents:
+ database.insert_content_path(bin_id, file)
+
# If the .orig.tar.gz is in a legacy directory we need to poolify
# it, so that apt-get source (and anything else that goes by the
# "Directory:" field in the Sources.gz file) works.
utils.copy(pkg.changes_file, Cnf["Dir::Root"] + dest)
for dest in copy_dot_dak.keys():
utils.copy(Upload.pkg.changes_file[:-8]+".dak", dest)
-
projectB.query("COMMIT WORK")
# Move the .changes into the 'done' directory
Cnf = None
projectB = None
-required_database_schema = 3
+required_database_schema = 4
################################################################################
queue_id_cache = {} #: cache for queues
uid_id_cache = {} #: cache for uids
suite_version_cache = {} #: cache for suite_versions (packages)
+suite_bin_version_cache = {}
+content_path_id_cache = {}
+content_file_id_cache = {}
+insert_contents_file_cache = {}
+cache_preloaded = False
################################################################################
@return: the version for I{source} in I{suite}
"""
+
global suite_version_cache
cache_key = "%s_%s" % (source, suite)
return version
+def get_latest_binary_version_id(binary, section, suite, arch):
+ global suite_bin_version_cache
+ cache_key = "%s_%s_%s_%s" % (binary, section, suite, arch)
+ cache_key_all = "%s_%s_%s_%s" % (binary, section, suite, get_architecture_id("all"))
+
+ # Check for the cache hit for its arch, then arch all
+ if suite_bin_version_cache.has_key(cache_key):
+ return suite_bin_version_cache[cache_key]
+ if suite_bin_version_cache.has_key(cache_key_all):
+ return suite_bin_version_cache[cache_key_all]
+ if cache_preloaded == True:
+ return # package does not exist
+
+ q = projectB.query("SELECT DISTINCT b.id FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.package = '%s' AND b.architecture = '%d' AND ba.suite = '%d' AND o.section = '%d'" % (binary, int(arch), int(suite), int(section)))
+
+ if not q.getresult():
+ return False
+
+ highest_bid = q.getresult()[0][0]
+
+ suite_bin_version_cache[cache_key] = highest_bid
+ return highest_bid
+
+def preload_binary_id_cache():
+ global suite_bin_version_cache, cache_preloaded
+
+ # Get suite info
+ q = projectB.query("SELECT id FROM suite")
+ suites = q.getresult()
+
+ # Get arch mappings
+ q = projectB.query("SELECT id FROM architecture")
+ arches = q.getresult()
+
+ for suite in suites:
+ for arch in arches:
+ q = projectB.query("SELECT DISTINCT b.id, b.package, o.section FROM binaries b JOIN bin_associations ba ON (b.id = ba.bin) JOIN override o ON (o.package=b.package) WHERE b.architecture = '%d' AND ba.suite = '%d'" % (int(arch[0]), int(suite[0])))
+
+ for bi in q.getresult():
+ cache_key = "%s_%s_%s_%s" % (bi[1], bi[2], suite[0], arch[0])
+ suite_bin_version_cache[cache_key] = int(bi[0])
+
+ cache_preloaded = True
+
################################################################################
def get_or_set_maintainer_id (maintainer):
q = projectB.query(sql)
return map(lambda x: x[0], q.getresult())
+
+################################################################################
+
+def get_or_set_contents_file_id(file):
+ global content_file_id_cache
+
+ if not content_file_id_cache.has_key(file):
+ sql_select = "SELECT id FROM content_file_names WHERE file = '%s'" % file
+ q = projectB.query(sql_select)
+ if not q.getresult():
+ # since this can be called within a transaction, we can't use currval
+ q = projectB.query("INSERT INTO content_file_names VALUES (DEFAULT, '%s') RETURNING id" % (file))
+ content_file_id_cache[file] = int(q.getresult()[0][0])
+ return content_file_id_cache[file]
+
+################################################################################
+
+def get_or_set_contents_path_id(path):
+ global content_path_id_cache
+
+ if not content_path_id_cache.has_key(path):
+ sql_select = "SELECT id FROM content_file_paths WHERE path = '%s'" % path
+ q = projectB.query(sql_select)
+ if not q.getresult():
+ # since this can be called within a transaction, we can't use currval
+ q = projectB.query("INSERT INTO content_file_paths VALUES (DEFAULT, '%s') RETURNING id" % (path))
+ content_path_id_cache[path] = int(q.getresult()[0][0])
+ return content_path_id_cache[path]
+
+################################################################################
+
+def insert_content_path(bin_id, fullpath):
+ global insert_contents_file_cache
+ cache_key = "%s_%s" % (bin_id, fullpath)
+
+ # have we seen this contents before?
+ # probably only revelant during package import
+ if insert_contents_file_cache.has_key(cache_key):
+ return
+
+ # split the path into basename, and pathname
+ (path, file) = os.path.split(fullpath)
+
+ # Get the necessary IDs ...
+ file_id = get_or_set_contents_file_id(file)
+ path_id = get_or_set_contents_path_id(path)
+
+ # Determine if we're inserting a duplicate row
+ q = projectB.query("SELECT 1 FROM content_associations WHERE binary_pkg = '%d' AND filepath = '%d' AND filename = '%d'" % (int(bin_id), path_id, file_id))
+ if q.getresult():
+ # Yes we are, return without doing the insert
+ return
+
+ # Put them into content_assiocations
+ projectB.query("INSERT INTO content_associations VALUES (DEFAULT, '%d', '%d', '%d')" % (bin_id, path_id, file_id))
+ return
--- /dev/null
+#!/usr/bin/python
+
+"""
+Class providing access to a projectb database
+
+This class provides convenience functions for common queries to a
+projectb database using psycopg2.
+
+Copyright (C) 2009 Mike O'Connor <stew@vireo.org>
+"""
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+################################################################################
+
+import psycopg2
+
+################################################################################
+
+class Projectb(object):
+ """
+ Object providing methods for accessing the projectb database
+ """
+ def __init__(self,Cnf):
+ connect_str = "dbname=%s"% (Cnf["DB::Name"])
+ if Cnf["DB::Host"] != '': connect_str += " host=%s" % (Cnf["DB::Host"])
+ if Cnf["DB::Port"] != '-1': connect_str += " port=%d" % (int(Cnf["DB::Port"]))
+
+ self.dbh = psycopg2.connect(connect_str)
+ self.suite_id_cache = {}
+ self.architecture_id_cache = {}
+ self.section_id_cache = {}
+
+ def get_suite_id(self, suite_name):
+ """
+ return the id for the given suite_name
+
+ @param suite_name: name of a suite such as "unsatble" or "testing"
+
+ @rtype: int
+ @return: id of given suite or None if suite_name not matched
+
+ >>> Cnf = {'DB::Name' : "projectb","DB::Host":"","DB::Port":'-1' }
+ >>> pb = Projectb( Cnf )
+ >>> pb.get_suite_id("unstable")
+ 5
+ >>> pb.get_suite_id("n'existe pas")
+ """
+ if not self.suite_id_cache.has_key(suite_name):
+ c = self.dbh.cursor()
+ c.execute("SELECT id FROM suite WHERE suite_name=%(suite_name)s",
+ {'suite_name':suite_name})
+ r = c.fetchone()
+ if r:
+ self.suite_id_cache[suite_name] = r[0]
+ else:
+ self.suite_id_cache[suite_name] = None
+
+ return self.suite_id_cache[suite_name]
+
+ def get_architecture_id(self, architecture_name):
+ """
+ return the id for the given architecture_name
+
+ @param architecture_name: name of a architecture such as "i386" or "source"
+
+ @rtype: int
+ @return: id of given architecture or None if architecture_name not matched
+
+ >>> Cnf = {'DB::Name' : "projectb","DB::Host":"","DB::Port":'-1' }
+ >>> pb = Projectb( Cnf )
+ >>> pb.get_architecture_id("i386")
+ 7
+ >>> pb.get_architecture_id("n'existe pas")
+ """
+ if not self.architecture_id_cache.has_key(architecture_name):
+ c = self.dbh.cursor()
+ c.execute("SELECT id FROM architecture WHERE arch_string=%(architecture_name)s",
+ {'architecture_name':architecture_name})
+ r = c.fetchone()
+ if r:
+ self.architecture_id_cache[architecture_name] = r[0]
+ else:
+ self.architecture_id_cache[architecture_name] = None
+
+ return self.architecture_id_cache[architecture_name]
+
+ def get_section_id(self, section_name):
+ """
+ return the id for the given section_name
+
+ @param section_name: name of a section such as "x11" or "non-free/libs"
+
+ @rtype: int
+ @return: id of given section or None if section_name not matched
+
+ >>> Cnf = {'DB::Name' : "projectb","DB::Host":"","DB::Port":'-1' }
+ >>> pb = Projectb( Cnf )
+ >>> pb.get_section_id("non-free/libs")
+ 285
+ >>> pb.get_section_id("n'existe pas")
+ """
+ if not self.section_id_cache.has_key(section_name):
+ c = self.dbh.cursor()
+ c.execute("SELECT id FROM section WHERE section=%(section_name)s",
+ {'section_name':section_name})
+ r = c.fetchone()
+ if r:
+ self.section_id_cache[section_name] = r[0]
+ else:
+ self.section_id_cache[section_name] = None
+
+ return self.section_id_cache[section_name]
+
+if __name__ == "__main__":
+ import doctest
+ doctest.testmod()
apt_pkg.ReadConfigFileISC(Cnf,which_conf_file())
################################################################################
+
+def generate_contents_information(filename):
+ """
+ Generate a list of flies contained in a .deb
+
+ @type filename: string
+ @param filename: the path to a .deb
+
+ @rtype: list
+ @return: a list of files in the data.tar.* portion of the .deb
+ """
+ cmd = "ar t %s" % (filename)
+ (result, output) = commands.getstatusoutput(cmd)
+ if result != 0:
+ reject("%s: 'ar t' invocation failed." % (filename))
+ reject(utils.prefix_multi_line_string(output, " [ar output:] "), "")
+
+ # Ugh ... this is ugly ... Code ripped from process_unchecked.py
+ chunks = output.split('\n')
+
+ contents = []
+ try:
+ cmd = "ar x %s %s" % (filename, chunks[2])
+ (result, output) = commands.getstatusoutput(cmd)
+ if result != 0:
+ reject("%s: '%s' invocation failed." % (filename, cmd))
+ reject(utils.prefix_multi_line_string(output, " [ar output:] "), "")
+
+ # Got deb tarballs, now lets go through and determine what bits
+ # and pieces the deb had ...
+ if chunks[2] == "data.tar.gz":
+ data = tarfile.open("data.tar.gz", "r:gz")
+ elif data_tar == "data.tar.bz2":
+ data = tarfile.open("data.tar.bz2", "r:bz2")
+ else:
+ os.remove(chunks[2])
+ reject("couldn't find data.tar.*")
+
+ for tarinfo in data:
+ if not tarinfo.isdir():
+ contents.append(tarinfo.name[2:])
+
+ finally:
+ if os.path.exists( chunks[2] ):
+ os.remove( chunks[2] )
+
+ return contents
+
+###############################################################################
<helix> elmo: I can't believe people pay you to fix computers
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+* Ganneff ponders how to best write the text to -devel. (need to tell em in
+ case they find more bugs). "We fixed the fucking idiotic broken implementation
+ to be less so" is probably not the nicest, even if perfect valid, way to say so
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%