Web build: refactor website_post and add exlusions.

author Graham Percival <graham@percival-music.ca>

Mon, 8 Mar 2010 20:39:50 +0000 (20:39 +0000)

committer Graham Percival <graham@percival-music.ca>

Mon, 8 Mar 2010 21:45:06 +0000 (21:45 +0000)
author Graham Percival <graham@percival-music.ca>
Mon, 8 Mar 2010 20:39:50 +0000 (20:39 +0000)
committer Graham Percival <graham@percival-music.ca>
Mon, 8 Mar 2010 21:45:06 +0000 (21:45 +0000)
diff --git a/scripts/build/website_post.py b/scripts/build/website_post.py

index 14905a85488130f213b44d5f7538b581fc8dd45f..9d64cb18b098df00fc2dcffe9180516aeb627bb7 100644 (file)
--- a/scripts/build/website_post.py
+++ b/scripts/build/website_post.py
@@ -1,133 +1,148 @@
  #!@PYTHON@
  #-*- coding: utf-8 -*-
  
-## This is web_post.py. This script deals with translations
-## in the "make website" target.
+##### This is web_post.py. This script deals with translations
+##### in the "make website" target.
  
  import sys
  import os
  import glob
+import re
  
-#### Translation data
+###### Translation data
  lang_lookup = {
-  'fr': 'français',
-  'es': 'español',
-  '': 'english'
+    'fr': 'français',
+    'es': 'español',
+    '': 'english'
  }
  
  lang_other_langs = {
-  'es': 'Otros idiomas: ',
-  'fr': 'Autres langues : ',
-  '': 'Other languages: '
+    'es': 'Otros idiomas: ',
+    'fr': 'Autres langues : ',
+    '': 'Other languages: '
  }
  
+exclude_pages = [
+    'music-glossary',
+    'snippets',
+    'internals',
+    'contributor'
+]
  
-#### Actual program
+###### Actual program
  
-#indir, outdir = sys.argv[1:]
-
-# FIXME: looks dangerous!
-indir = sys.argv[1]
-outdir=indir
-
-os.chdir(indir)
+dir = sys.argv[1]
  
+os.chdir(dir)
  html_files = glob.glob( '*.html' )
  
-# messy way to get all languages
+
+### messy way to get all languages
  langs_set = set()
  for file in html_files:
-       file_split = file.split('.')
-       if (len(file_split) == 2):
-               # it's English
-               lang = ''
-       else:
-               # it's a translation
-               lang = file_split[1]
-       # make sure it's a real language
-       if (not (lang == "en")):
-               langs_set.add(lang)
+    file_split = file.split('.')
+    if (len(file_split) == 2):
+        # it's English
+        lang = ''
+    elif (len(file_split) == 3):
+        # it's a translation
+        lang = file_split[1]
+        # make sure it's a translated language
+        if (not (lang == "en")):
+            langs_set.add(lang)
  langs = list(langs_set)
  langs.sort()
  
-def makeFooter(currentLang, currentPage):
-       text = "<p id=\"languages\">\n"
-       text += lang_other_langs[currentLang]
-       for i in range(len(langs)):
-               l = langs[i]
-               if (l == currentLang):
-                       continue
-               text += "<a href=\""
-               text += currentPage
-               if (not (l=="")):
-                       text += "." + l
-               text += ".html\">"
-               text += lang_lookup[l]
-               text += "</a>"
-               if (i < len(langs)-2):
-                       text += ", "
-               else:
-                       text += ".\n"
-       # TODO: add link to automatic language selection?
-       # still need to include this page in the new webpages somewhere
-       text += "</p>\n"
-       return text
-
  
+### helper functions
+def addLangExt(filename, lang, ext):
+    text = filename
+    if (not (lang=="")):
+        text += "." + lang
+    text += "." + ext
+    return text
+
+def makeFooter(filename, currentLang):
+    text = "<p id=\"languages\">\n"
+    text += lang_other_langs[currentLang]
+    for i in range(len(langs)):
+        lang = langs[i]
+        if (lang == currentLang):
+            continue
+        text += "<a href=\""
+       text += addLangExt(filename, lang, "html")
+        text += "\">"
+        text += lang_lookup[lang]
+        text += "</a>"
+        if (i < len(langs)-2):
+            text += ", "
+        else:
+            text += ".\n"
+    # TODO: add link to automatic language selection?
+    # still need to include this page in the new webpages somewhere
+    text += "</p>\n"
+    return text
+
+def getLocalHref(line):
+    match = re.search(r'href=[\'"]?([^\'" >]+)', line)
+    if match:
+        url = match.group(0)[6:]
+        if (url[0:7] == "http://"):
+            url = ''
+        # strip any '#'
+        omit = url.find('#')
+        if (omit >= 0):
+            url = url[0:omit]
+    else:
+        url = ''
+    return url
+
+
+
+
+### main loop
  for file in html_files:
-       file_split = file.split('.')
-       # we want to strip the .html
-       out_filename = os.path.basename(file_split[0])
-       if (len(file_split) == 2):
-               # it's English
-               lang = ''
-               # possibly necessary for automatic language selection
-               file_symlink =file.replace(".html", ".en.html")
-               if (not (os.path.exists(file_symlink))):
-                       os.symlink (file, file_symlink)
-       else:
-               # it's a translation
-               lang = file_split[1]
-       # it's a symlink
-       if (lang == "en"):
-               continue
-       out_filename += '.'+lang
-
-# I can't get the previous name to work
-       out_filename = os.path.basename(file)
-
-       # translation links should point to translations
-       lines = open(file).readlines()
-       # ick
-       os.remove(file)
-
-       # ick
-       lang_footer = makeFooter(lang, out_filename.split('.')[0])
-       
-       outfile = open( out_filename, 'w')
-       for line in lines:
-               # avoid external links
-               if ((line.find("href") >= 0) and (line.find("http")==-1)):
-# eventually we want to do this, but I can't get it to work.
-# waiting for help with apache (?)
-#                      line = line.replace(".html", "."+lang)
-                       text = ""
-                       if (not (lang=="")):
-                               text += "." + lang
-                       text += ".html"
-                       line = line.replace(".html", text)
-               if ((line.find("href") >= 0) and
-                   (line.find("http")==-1) and
-                   (line.find("pdf") >= 0)):
-                       text = ""
-                       if (not (lang=="")):
-                               text += "." + lang
-                       text += ".pdf"
-                       line = line.replace(".pdf", text)
-
-
-               if (line.find("<!-- FOOTER -->") >= 0):
-                       outfile.write( lang_footer )
-               outfile.write(line)
-       outfile.close()
+    ### we want to strip the .html and get the lang
+    file_split = file.split('.')
+    file_base = os.path.basename( file_split[0] )
+    if (len(file_split) == 2):
+        # it's English
+        lang = ''
+        # possibly necessary for automatic language selection
+        file_symlink = file.replace(".html", ".en.html")
+        if (not (os.path.exists(file_symlink))):
+            os.symlink (file, file_symlink)
+    elif (len(file_split) == 3):
+        # it's a translation
+        lang = file_split[1]
+        if (lang == "en"):
+            # it's a symlink
+            continue
+    else:
+        # it's a mess
+        print "is a mess"
+        continue
+
+    ### we need to replace parts of the file
+    lines = open(file).readlines()
+    os.remove(file)
+    outfile = open(file, 'w')
+
+    lang_footer = makeFooter(file_base, lang)
+
+
+    ### replace links as appropraite
+    for line in lines:
+        link = getLocalHref(line)
+        if (link != ""):
+            link_base = link.split('.')[0]
+            if (line.endswith(".html")):
+               langlink = addLangExt(link_base, lang, "html")
+                line.replace(link, langlink)
+            if (line.endswith(".pdf")):
+               langlink = addLangExt(link_base, lang, "pdf")
+        if (line.find("<!-- FOOTER -->") >= 0):
+            outfile.write( lang_footer )
+        outfile.write(line)
+    outfile.close()
author	Graham Percival <graham@percival-music.ca>
	Mon, 8 Mar 2010 20:39:50 +0000 (20:39 +0000)
committer	Graham Percival <graham@percival-music.ca>
	Mon, 8 Mar 2010 21:45:06 +0000 (21:45 +0000)