4 # translate an entire Wiki site into local html.
12 program_version = '@TOPLEVEL_VERSION@'
13 if program_version == '@' + 'TOPLEVEL_VERSION' + '@':
14 program_version = '1.5.69'
18 print """Usage: wiki-slurp.py [OPTIONS]... ENTRY-PATTERN...
20 Download a WikiWiki site and convert to local html.
22 Example: wiki-slurp.py -d /tmp/output 'http://c2.com/cgi-bin/wiki?'
25 -h, --help show this help
26 -m, --mangle mangle file names to be shorter
27 -d, --outdir=DIR set output directory to DIR
28 -v, --version show version information
30 Warning: downloading an entire Wiki site generates a huge amount of
31 traffic and server load. Consider asking for a copy of the database.
32 See also http://c2.com/cgi-bin/wiki?WikiSlurp.
34 Report bugs to bug-lilypond@gnu.org.
36 Written by Han-Wen Nienhuys <hanwen@cs.uu.nl>.
41 print r"""wiki-slurp.py %s
43 This is free software. It is covered by the GNU General Public License,
44 and you are welcome to change it and/or distribute copies of it under
45 certain conditions. Invoke as `midi2ly --warranty' for more information.
47 Copyright (c) 2000--2004 by Han-Wen Nienhuys <hanwen@cs.uu.nl>
50 (options, files) = getopt.getopt (sys.argv[1:], 'vd:hm', ['help','mangle','version', 'outdir='])
57 return '%d' % hash (name)
66 if o== '--help' or o == '-h':
69 elif o == '--version' or o == '-v':
72 elif o == '--mangle' or o == '-m':
74 elif o == '--outdir' or o == '-d':
85 sys.stderr.write ("\n")
89 for pattern in patterns:
90 re_patterns.append (re.sub ('([?.])', '\\\\\\1', pattern))
99 def unwiki (str, pat, mangler):
100 local = '<a href="%s([A-Za-z]+)">([A-Za-z]+)</a>' % pat
103 def do_replace (match, us = newurls, mangler = mangler):
104 newurl = match.group (1)
105 local = mangler (newurl)
107 replacement = '<a href="%s.html">%s</a>' % (local,newurl)
111 str = re.sub (local, do_replace, str)
112 otherurl = '<a href="%s[^>]*">([?A-Za-z]+)</a>' % pat
113 str = re.sub (otherurl, '\\1', str)
115 imagesrc = '<a href="%s[^>]*">(<img[^>]*>)</a>' % pat
116 str = re.sub (imagesrc, '\\1', str)
118 return (str, newurls)
129 mangled = mangler (f)
131 sys.stderr.write ("reading `%s' ... " % f)
134 page = urllib.urlopen (patterns[0] + f).read ()
135 sys.stderr.write ('done. ')
138 for re_pattern in re_patterns:
139 (page, nus) = unwiki (page, re_pattern, mangler)
142 outname = os.path.join (outdir, mangled) + '.html'
143 fo = open (outname, 'w')
145 sys.stderr.write ("Writing `%s'\n" % outname)
152 page = open ('/tmp/FrontPage.html').read()
153 (str, us)=unwiki (page, re_patterns)