3 Helper code for contents generation.
5 @contact: Debian FTPMaster <ftpmaster@debian.org>
6 @copyright: 2011 Torsten Werner <twerner@debian.org>
7 @license: GNU General Public License version 2 or later
10 ################################################################################
12 # This program is free software; you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation; either version 2 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful,
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 # GNU General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program; if not, write to the Free Software
24 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 ################################################################################
28 from daklib.dbconn import *
29 from daklib.config import Config
30 from daklib.threadpool import ThreadPool
31 from multiprocessing import Pool
33 from sqlalchemy import desc, or_
34 from sqlalchemy.exc import IntegrityError
35 from subprocess import Popen, PIPE, call
39 class ContentsWriter(object):
41 ContentsWriter writes the Contents-$arch.gz files.
43 def __init__(self, suite, architecture, overridetype, component = None):
45 The constructor clones its arguments into a new session object to make
46 sure that the new ContentsWriter object can be executed in a different
50 self.architecture = architecture
51 self.overridetype = overridetype
52 self.component = component
53 self.session = suite.session()
57 Returns a query object that is doing most of the work.
59 overridesuite = self.suite
60 if self.suite.overridesuite is not None:
61 overridesuite = get_suite(self.suite.overridesuite, self.session)
63 'suite': self.suite.suite_id,
64 'overridesuite': overridesuite.suite_id,
65 'arch_all': get_architecture('all', self.session).arch_id,
66 'arch': self.architecture.arch_id,
67 'type_id': self.overridetype.overridetype_id,
68 'type': self.overridetype.overridetype,
71 if self.component is not None:
72 params['component'] = self.component.component_id
74 create temp table newest_binaries (
75 id integer primary key,
78 create index newest_binaries_by_package on newest_binaries (package);
80 insert into newest_binaries (id, package)
81 select distinct on (package) id, package from binaries
82 where type = :type and
83 (architecture = :arch_all or architecture = :arch) and
84 id in (select bin from bin_associations where suite = :suite)
85 order by package, version desc;
90 (select o.package, s.section
91 from override o, section s
92 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
93 o.component = :component)
95 select bc.file, o.section || '/' || b.package as package
96 from newest_binaries b, bin_contents bc, unique_override o
97 where b.id = bc.binary_id and o.package = b.package
98 order by bc.file, b.package'''
102 create temp table newest_binaries (
103 id integer primary key,
106 create index newest_binaries_by_package on newest_binaries (package);
108 insert into newest_binaries (id, package)
109 select distinct on (package) id, package from binaries
110 where type = :type and
111 (architecture = :arch_all or architecture = :arch) and
112 id in (select bin from bin_associations where suite = :suite)
113 order by package, version desc;
118 (select distinct on (o.package, s.section) o.package, s.section
119 from override o, section s
120 where o.suite = :overridesuite and o.type = :type_id and o.section = s.id
121 order by o.package, s.section, o.modified desc)
123 select bc.file, o.section || '/' || b.package as package
124 from newest_binaries b, bin_contents bc, unique_override o
125 where b.id = bc.binary_id and o.package = b.package
126 order by bc.file, b.package'''
128 return self.session.query("file", "package").from_statement(sql). \
131 def formatline(self, filename, package_list):
133 Returns a formatted string for the filename argument.
135 package_list = ','.join(package_list)
136 return "%-55s %s\n" % (filename, package_list)
140 Yields a new line of the Contents-$arch.gz file in filename order.
144 for filename, package in self.query().yield_per(100):
145 if filename != last_filename:
146 if last_filename is not None:
147 yield self.formatline(last_filename, package_list)
148 last_filename = filename
150 package_list.append(package)
151 if last_filename is not None:
152 yield self.formatline(last_filename, package_list)
153 # end transaction to return connection to pool
154 self.session.rollback()
158 Returns a list of lines for the Contents-$arch.gz file.
160 return [item for item in self.fetch()]
162 def output_filename(self):
164 Returns the name of the output file.
167 'root': Config()['Dir::Root'],
168 'suite': self.suite.suite_name,
169 'architecture': self.architecture.arch_string
171 if self.component is None:
172 return "%(root)s/dists/%(suite)s/Contents-%(architecture)s.gz" % values
173 values['component'] = self.component.component_name
174 return "%(root)s/dists/%(suite)s/%(component)s/Contents-%(architecture)s.gz" % values
176 def get_header(self):
178 Returns the header for the Contents files as a string.
182 filename = os.path.join(Config()['Dir::Templates'], 'contents')
183 header_file = open(filename)
184 return header_file.read()
189 def write_file(self):
191 Write the output file.
193 command = ['gzip', '--rsyncable']
194 output_file = open(self.output_filename(), 'w')
195 gzip = Popen(command, stdin = PIPE, stdout = output_file)
196 gzip.stdin.write(self.get_header())
197 for item in self.fetch():
198 gzip.stdin.write(item)
204 def write_all(class_, suite_names = [], force = False):
206 Writes all Contents files for suites in list suite_names which defaults
207 to all 'touchable' suites if not specified explicitely. Untouchable
208 suites will be included if the force argument is set to True.
210 session = DBConn().session()
211 suite_query = session.query(Suite)
212 if len(suite_names) > 0:
213 suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
215 suite_query = suite_query.filter_by(untouchable = False)
217 for suite in suite_query:
218 for architecture in suite.get_architectures(skipsrc = True, skipall = True):
219 # handle 'deb' packages
220 command = ['dak', 'contents', '-s', suite.suite_name, \
221 'generate_helper', architecture.arch_string, 'deb']
222 pool.apply_async(call, (command, ))
223 # handle 'udeb' packages for 'main' and 'non-free'
224 command = ['dak', 'contents', '-s', suite.suite_name, \
225 'generate_helper', architecture.arch_string, 'udeb', 'main']
226 pool.apply_async(call, (command, ))
227 command = ['dak', 'contents', '-s', suite.suite_name, \
228 'generate_helper', architecture.arch_string, 'udeb', 'non-free']
229 pool.apply_async(call, (command, ))
235 class ContentsScanner(object):
237 ContentsScanner provides a threadsafe method scan() to scan the contents of
240 def __init__(self, binary):
242 The argument binary is the actual DBBinary object that should be
245 self.binary_id = binary.binary_id
247 def scan(self, dummy_arg = None):
249 This method does the actual scan and fills in the associated BinContents
250 property. It commits any changes to the database. The argument dummy_arg
251 is ignored but needed by our threadpool implementation.
253 session = DBConn().session()
254 binary = session.query(DBBinary).get(self.binary_id)
255 for filename in binary.scan_contents():
256 binary.contents.append(BinContents(file = filename))
259 except IntegrityError:
261 binary.contents.append(BinContents(file = 'DUPLICATE_FILENAMES'))
266 def scan_all(class_, limit = None):
268 The class method scan_all() scans all binaries using multiple threads.
269 The number of binaries to be scanned can be limited with the limit
270 argument. Returns the number of processed and remaining packages as a
273 session = DBConn().session()
274 query = session.query(DBBinary).filter(DBBinary.contents == None)
275 remaining = query.count
276 if limit is not None:
277 query = query.limit(limit)
278 processed = query.count()
279 threadpool = ThreadPool()
280 for binary in query.yield_per(100):
281 threadpool.queueTask(ContentsScanner(binary).scan)
283 remaining = remaining()
285 return { 'processed': processed, 'remaining': remaining }