if q.has_key('popcon'):
db[p]['main']['debian_popcon'] = q['popcon']
# if we have debian, need to get ubuntu
- q = dde_get(query_url + "/packages/prio-ubuntu-natty/%s" % p)
+ q = dde_get(query_url + "/packages/prio-ubuntu-oneiric/%s" % p)
if q and q.has_key('popcon'):
db[p]['main']['ubuntu_popcon'] = q['popcon']
else:
def convert_longdescr(ld):
+ """
+
+ yoh: I think all this long description conversion will keep giving
+ us problems since per se there is no strict regulations,
+ especially in blends files
+ """
+ descr = u''
ld = ld.replace('% ', '%% ')
+ ld = ld.replace(r'\t', ' ') # just in case assuming tab 4
ld = ld.split('\n')
- for i, l in enumerate(ld):
- if l == ' .':
- ld[i] = ' #NEWLINEMARKER#'
- # look for embedded lists
- elif len(l) >=3 and l[:2] == ' ' and l[2] in '-*':
- ld[i] = ' #NEWLINEMARKER# ' + l[2:]
-
- ld = u' '.join([l[1:] for l in ld])
- ld = ld.replace('#NEWLINEMARKER# ', '\n\n')
- # cleanup any leftover (e.g. trailing markers)
- ld = ld.replace('#NEWLINEMARKER#', '')
- # safe-guard ReST active symbols
- ld = re.sub(r'([\'`*])', r'\\\1', ld)
- return ld
+ re_leadblanks = re.compile("^ *")
+ re_itemized = re.compile("^[o*-+] +")
+ re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
+ re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
+
+ def unwrap_lines(lines):
+ out = []
+ indent_levels = [-1]
+ for l in lines:
+ match = re_itemized_gr.search(l).groups()
+ if ((len(match[0]) in indent_levels and match[1] is None)
+ or (len(match[0]) > max(indent_levels)+4)) \
+ and match[2].strip() != '.':
+ # append to previous
+ if not out[-1].endswith(" "):
+ out[-1] += " "
+ out[-1] += match[2]
+ else:
+ out.append(l)
+
+ indent_levels = [len(match[0])]
+ if match[1] is not None:
+ indent_levels += [len(match[0]) + len(match[1])]
+ if match[2].strip() == '.':
+ # reset though if '.'
+ indent_levels = [-1]
+ return out
+
+ def dedent_withlevel(lines):
+ """Dedent `lines` given in a list provide dedented lines and how much was dedented
+ """
+ nleading = min([re_leadblanks.search(l).span()[1]
+ for l in lines])
+ return [l[nleading:] for l in lines], nleading
+
+ def block_lines(ld, level=0):
+ # so we got list of lines
+ # dedent all of them first
+ ld, level = dedent_withlevel(ld)
+
+ # lets collect them in blocks/paragraphs
+ # 1. into paragraphs split by '.'
+ blocks, block = [], None
+
+ # next block can begin if
+ # 1. . line
+ # 2. it was an itemized list and all items begin with
+ # the same symbol or get further indented accordingly
+ # so let's first check if it is an itemized list
+ itemized_match = re_itemized.search(ld[0])
+ if itemized_match:
+ allow_indents = " "*itemized_match.span()[1]
+ else:
+ allow_indents = None
+ for l in ld:
+ if block is None or l.strip() == '.' \
+ or (len(l) and ( len(block) and (
+ (l.startswith(' ') and not block[-1].startswith(' '))
+ or
+ (not l.startswith(' ') and block[-1].startswith(' '))))):
+ block = []
+ blocks.append(block)
+ if l.strip() != '.':
+ block.append(l)
+ if len(blocks) == 1:
+ return blocks[0]
+ else:
+ return [block_lines(b, level+1) for b in blocks if len(b)]
+
+ def blocks_to_rst(bls, level=0):
+ # check if this block is an itemized beast
+ #itemized_match = re_itemized_gr.search(bls[0][0])
+ #if itemized_match:
+ # res += ' 'allow_indents = " "*itemized_match.span()[1]
+ out = ''
+ for b in bls:
+ if isinstance(b, list):
+ if len(b) == 1:
+ out += " "*level + b[0] + '\n\n'
+ else:
+ out += blocks_to_rst(b, level+1)
+ else:
+ e = " "*level + b + '\n'
+ if not re_itemized.search(b):
+ pass
+ #e += '\n'
+ elif len(e) and e[0] == ' ':
+ # strip 1 leading blank
+ e = e[1:]
+ out += e
+ out += '\n'
+ return out
+
+ ld = unwrap_lines(ld)
+ bls = block_lines(ld)
+ return blocks_to_rst(bls)
def underline_text(text, symbol):
def sort_by_maintainer(db):
maints = {}
+ maint_ids = {}
for pkg in db.keys():
- if not 'main' in db[pkg]:
+ maint = None
+ pkginfo = db[pkg]
+ # start with the blends info
+ if 'blends' in pkginfo and 'responsible' in pkginfo['blends']:
+ maint = pkginfo['blends']['responsible']
+ if not 'main' in db[pkg] and maint is None:
# no info
continue
info = db[pkg]['main']
- if not 'maintainer' in info:
+ if not 'maintainer' in info and maint is None:
# no maintainer info
continue
if 'original_maintainer' in info and not info['original_maintainer'] is None:
maint = info['original_maintainer']
- else:
+ elif 'maintainer' in info and not info['maintainer'] is None:
maint = info['maintainer']
if maint is None:
# no sane maintainer info
continue
# safeguard: <> confuses sphinx and we don't care about different emails
maint = maint[:maint.find('<')].strip()
- if not maint in maints:
- maints[maint] = []
+ # kick out non-ascii ones (should not be, but too tired to find the bug)
+ try:
+ codecs.ascii_decode(maint)
+ except UnicodeEncodeError:
+ continue
+ if not maint.lower() in maints:
+ maints[maint.lower()] = []
+ maint_ids[maint.lower()] = [maint]
else:
- maints[maint].append(pkg)
+ maint_ids[maint.lower()].append(maint)
+ maints[maint.lower()].append(pkg)
# remove duplicates
+ out = {}
for m in maints:
- maints[m] = np.unique(maints[m])
- return maints
+ out[maint_ids[m][0]] = np.unique(maints[m])
+ return out
def sort_by_release(db):
ids.sort()
for id_ in ids:
label = ('pkgs-%s-%s' % (sectitle, id_)).lower().replace(' ', '_').replace('/', '_')
- if not len(pkgsdict[id_]):
+ # filter out crap
+ filtered_pkgs = [p for p in pkgsdict[id_] if p in db]
+ if not len(filtered_pkgs):
continue
- try:
- plist = toc_template.render(
- label=label,
- title=underline_text(title_tmpl % id_, '='),
- pkgs=pkgsdict[id_],
- db=db)
- if not plist:
- continue
- toc = codecs.open(os.path.join(outdir,
- 'pkglists',
- '%s.rst' % label),
- 'w', 'utf-8')
- toc.write(toc_template.render(
- label=label,
- title=underline_text(title_tmpl % id_, '='),
- pkgs=pkgsdict[id_],
- db=db))
- toc.close()
- except jinja2.exceptions.UndefinedError:
- # ignore crap
- pass
+ plist = toc_template.render(
+ label=label,
+ title=underline_text(title_tmpl % id_, '='),
+ pkgs=filtered_pkgs,
+ db=db)
+ toc = codecs.open(os.path.join(outdir,
+ 'pkglists',
+ '%s.rst' % label),
+ 'w', 'utf-8')
+ toc.write(plist)
+ toc.close()
hltoc.write('* :ref:`%s`\n' % label)
hltoc.write('\n\n')
cfg = SafeConfigParser()
cfg.read(opts.cfg)
+ if cmd == 'debug_ld':
+ # load the db from file
+ db = read_db(opts.db)
+
+ for p in db.keys():
+ #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
+ if not 'long_description' in db[p]['main']:
+ continue
+ ld = db[p]['main']['long_description']
+
+ print ">>>>>>>>> ", p
+ #print ld
+ print "----"
+ print convert_longdescr(ld)
+ raise SystemExit
# load existing db, unless renew is requested
if cmd == 'updatedb':
db = {}