]> git.donarmstrong.com Git - neurodebian.git/commitdiff
ENH: redid handling of long descriptions -- imho better now but more TODO
authorYaroslav Halchenko <debian@onerussian.com>
Tue, 5 Apr 2011 04:04:47 +0000 (00:04 -0400)
committerYaroslav Halchenko <debian@onerussian.com>
Tue, 5 Apr 2011 04:04:47 +0000 (00:04 -0400)
nested itemized lists and descriptions in general are still not
handled properly most of the time.  But at least now it looks more
coherent and should not swallow the spaces

neurodebian/dde.py

index 23fb669065f4539165b59ee2f732b2e3f7e0980f..1dad430083f36d9b5aeab71a48a2bc7aa6f64668 100755 (executable)
@@ -574,33 +574,112 @@ def assure_unicode(s):
 
 
 def convert_longdescr(ld):
+    """
+
+    yoh: I think all this long description conversion will keep giving
+    us problems since per se there is no strict regulations,
+    especially in blends files
+    """
     descr = u''
     ld = ld.replace('% ', '%% ')
+    ld = ld.replace(r'\t', '    ') # just in case assuming tab 4
     ld = ld.split('\n')
-    isindented = False
-    for i, l in enumerate(ld):
-        if l == ' .':
-            isindented = False
-            ld[i] = ' #NEWLINEMARKER# '
-        # look for embedded lists
-        elif len(l) >=3 and l[:2] == '  ':
-            if l[2] in '-*':
-                isindented = False
-                ld[i] = ' #NEWLINEMARKER# ' + l[2:]
-            elif not isindented:
-                ld[i] = ' \n::\n\n' + l
-                isindented = True
+    re_leadblanks = re.compile("^ *")
+    re_itemized = re.compile("^[o*-+] +")
+    re_itemized_gr = re.compile("^( *)([-o*+] +)?(.*?)$")
+    re_description_gr = re.compile("^( *[^-]+ - )(.*?)$")
+
+    def unwrap_lines(lines):
+        out = []
+        indent_levels = [-1]
+        for l in lines:
+            match = re_itemized_gr.search(l).groups()
+            if ((len(match[0]) in indent_levels and match[1] is None)
+                or (len(match[0]) > max(indent_levels)+4)) \
+                and match[2].strip() != '.':
+                # append to previous
+                if not out[-1].endswith(" "):
+                    out[-1] += " "
+                out[-1] += match[2]
+            else:
+                out.append(l)
+
+            indent_levels = [len(match[0])]
+            if match[1] is not None:
+                indent_levels += [len(match[0]) + len(match[1])]
+            if match[2].strip() == '.':
+                # reset though if '.'
+                indent_levels = [-1]
+        return out
+
+    def dedent_withlevel(lines):
+        """Dedent `lines` given in a list provide dedented lines and how much was dedented
+        """
+        nleading = min([re_leadblanks.search(l).span()[1]
+                        for l in lines])
+        return [l[nleading:] for l in lines], nleading
+
+    def block_lines(ld, level=0):
+        # so we got list of lines
+        # dedent all of them first
+        ld, level = dedent_withlevel(ld)
+
+        # lets collect them in blocks/paragraphs
+        # 1. into paragraphs split by '.'
+        blocks, block = [], None
+
+        # next block can begin if
+        #  1.  . line
+        #  2. it was an itemized list and all items begin with
+        #     the same symbol or get further indented accordingly
+        #     so let's first check if it is an itemized list
+        itemized_match = re_itemized.search(ld[0])
+        if itemized_match:
+            allow_indents = " "*itemized_match.span()[1]
+        else:
+            allow_indents = None
+        for l in ld:
+            if block is None or l.strip() == '.' \
+                   or (len(l) and ( len(block) and (
+                (l.startswith(' ') and not block[-1].startswith(' '))
+                or
+                (not l.startswith(' ') and block[-1].startswith(' '))))):
+                block = []
+                blocks.append(block)
+            if l.strip() != '.':
+                block.append(l)
+        if len(blocks) == 1:
+            return blocks[0]
+        else:
+            return [block_lines(b, level+1) for b in blocks]
+
+    def blocks_to_rst(bls, level=0):
+        # check if this block is an itemized beast
+        #itemized_match = re_itemized_gr.search(bls[0][0])
+        #if itemized_match:
+        #    res += ' 'allow_indents = " "*itemized_match.span()[1]
+        out = ''
+        for b in bls:
+            if isinstance(b, list):
+                if len(b) == 1:
+                    out += " "*level + b[0] + '\n\n'
+                else:
+                    out += blocks_to_rst(b, level+1)
             else:
-                # leave as is
-                ld[i] = ' %s\n' % l
-        descr += ld[i][1:]
+                e = " "*level + b + '\n'
+                if not re_itemized.search(b):
+                    pass
+                    #e += '\n'
+                elif len(e) and e[0] == ' ':
+                    # strip 1 leading blank
+                    e = e[1:]
+                out += e
+        out += '\n'
+        return out
 
-    descr = descr.replace('#NEWLINEMARKER# ', '\n\n')
-    # cleanup any leftover (e.g. trailing markers)
-    descr = descr.replace('#NEWLINEMARKER#', '')
-    # safe-guard ReST active symbols
-    descr = re.sub(r'([\'`*])', r'\\\1', descr)
-    return descr
+    ld = unwrap_lines(ld)
+    bls = block_lines(ld)
+    return blocks_to_rst(bls)
 
 
 def underline_text(text, symbol):
@@ -890,6 +969,21 @@ def main():
     cfg = SafeConfigParser()
     cfg.read(opts.cfg)
 
+    if cmd == 'debug_ld':
+        # load the db from file
+        db = read_db(opts.db)
+
+        for p in db.keys():
+        #for p in ['dtitk', 'psychopy', 'psytoolkit', 'ginkgo-cadx', 'gridengine-master', 'cctools']:
+            if not 'long_description' in db[p]['main']:
+                continue
+            ld = db[p]['main']['long_description']
+
+            print ">>>>>>>>> ", p
+            #print ld
+            print "----"
+            print convert_longdescr(ld)
+        raise SystemExit
     # load existing db, unless renew is requested
     if cmd == 'updatedb':
         db = {}