4 # translate an entire Wiki site into local html.
12 program_version = '@TOPLEVEL_VERSION@'
13 if program_version == '@' + 'TOPLEVEL_VERSION' + '@':
14 program_version = '1.5.69'
18 print """Usage: wiki-slurp.py [OPTION]... ENTRY-PATTERN...
20 Download a WikiWiki site and convert to local html.
22 Example: wiki-slurp.py -d /tmp/output 'http://c2.com/cgi-bin/wiki?'
26 -m,--mangle mangle file names to be shorter
27 -d,--outdir=DIR set output directory to DIR
28 -v,--version version information
30 Warning: downloading an entire Wiki site generates a huge amount of
31 traffic and server load. Consider asking for a copy of the database.
32 See also http://c2.com/cgi-bin/wiki?WikiSlurp
34 Report bugs to bug-lilypond@gnu.org.
36 Written by Han-Wen Nienhuys <hanwen@cs.uu.nl>
40 print r"""wiki-slurp.py %s
42 This is free software. It is covered by the GNU General Public License,
43 and you are welcome to change it and/or distribute copies of it under
44 certain conditions. Invoke as `midi2ly --warranty' for more information.
46 Copyright (c) 2000--2003 by Han-Wen Nienhuys <hanwen@cs.uu.nl>
49 (options, files) = getopt.getopt (sys.argv[1:], 'vd:hm', ['help','mangle','version', 'outdir='])
56 return '%d' % hash (name)
65 if o== '--help' or o == '-h':
68 elif o == '--version' or o == '-v':
71 elif o == '--mangle' or o == '-m':
73 elif o == '--outdir' or o == '-d':
84 sys.stderr.write ("\n")
88 for pattern in patterns:
89 re_patterns.append (re.sub ('([?.])', '\\\\\\1', pattern))
98 def unwiki (str, pat, mangler):
99 local = '<a href="%s([A-Za-z]+)">([A-Za-z]+)</a>' % pat
102 def do_replace (match, us = newurls, mangler = mangler):
103 newurl = match.group (1)
104 local = mangler (newurl)
106 replacement = '<a href="%s.html">%s</a>' % (local,newurl)
110 str = re.sub (local, do_replace, str)
111 otherurl = '<a href="%s[^>]*">([?A-Za-z]+)</a>' % pat
112 str = re.sub (otherurl, '\\1', str)
114 imagesrc = '<a href="%s[^>]*">(<img[^>]*>)</a>' % pat
115 str = re.sub (imagesrc, '\\1', str)
117 return (str, newurls)
128 mangled = mangler (f)
130 sys.stderr.write ("reading `%s' ... " % f)
133 page = urllib.urlopen (patterns[0] + f).read ()
134 sys.stderr.write ('done. ')
137 for re_pattern in re_patterns:
138 (page, nus) = unwiki (page, re_pattern, mangler)
141 outname = os.path.join (outdir, mangled) + '.html'
142 fo = open (outname, 'w')
144 sys.stderr.write ("Writing `%s'\n" % outname)
151 page = open ('/tmp/FrontPage.html').read()
152 (str, us)=unwiki (page, re_patterns)