Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorMartin Langhoff <martin@laptop.org>2010-12-08 17:14:15 (GMT)
committer Martin Langhoff <martin@laptop.org>2010-12-08 17:14:15 (GMT)
commit8f45d8db8820b3a30fd9981230d7a418266efe88 (patch)
tree71df8878665f50a312d12b91a9927992167a1549 /tools
parent6e8ec04ff35ee4bb2226d68df5ba02e8aa1446a0 (diff)
Language portability fixes to expandtemplates.py
Diffstat (limited to 'tools')
-rwxr-xr-xtools/expandtemplates.py40
1 files changed, 31 insertions, 9 deletions
diff --git a/tools/expandtemplates.py b/tools/expandtemplates.py
index 6b8a760..58b3905 100755
--- a/tools/expandtemplates.py
+++ b/tools/expandtemplates.py
@@ -68,6 +68,11 @@ class ArticleIndex:
class WPWikiDB:
"""Retrieves article contents for mwlib."""
+ def __init__(self, lang, templateprefix, templateblacklist):
+ self.lang = lang
+ self.templateprefix = templateprefix
+ self.templateblacklist = templateblacklist
+
def getRawArticle(self, title, followRedirects=True):
# Retrieve article text, recursively following #redirects.
if title == '':
@@ -113,7 +118,10 @@ class WPWikiDB:
return self.getRawArticle(title)
def expandArticle(self, article_text, title):
- template_expander = expander.Expander(article_text, pagename=title, wikidb=self)
+ template_expander = expander.Expander(article_text, pagename=title,
+ wikidb=self, lang=self.lang,
+ templateprefix = self.templateprefix,
+ templateblacklist = self.templateblacklist)
return template_expander.expandTemplates()
def getExpandedArticle(self, title):
@@ -178,19 +186,33 @@ def wp_load_article_fork(title):
# __main__
-# prep a isting of redirects. wp.so hides them from
-# us, which would bloat our
-
-load_db(sys.argv[1])
+path = sys.argv[1]
+load_db(path)
index = ArticleIndex('%s.index.txt' % sys.argv[1])
rawindex = index.rawindex()
-wikidb = WPWikiDB()
-rx = re.compile('(Plantilla|Template|Wikipedia):')
-
-for title in rawindex: #['1812 invasion of Russia', '1857 revolt']: #rawindex:
+lang = os.path.basename(path)[0:2]
+## FIXME GETTEXT
+templateprefixes = { 'en': 'Template:',
+ 'es': 'Plantilla:' }
+templateprefix = templateprefixes[ lang ]
+
+# load blacklist only once
+templateblacklist = set()
+templateblacklistpath = os.path.join(os.path.dirname(path),
+ 'template_blacklist')
+if os.path.exists(templateblacklistpath):
+ with open(templateblacklistpath, 'r') as f:
+ for line in f.readlines():
+ templateblacklist.add(line.rstrip().decode('utf8'))
+
+wikidb = WPWikiDB(lang, templateprefix, templateblacklist)
+rx = re.compile('('+templateprefix+'|Wikipedia:)')
+
+for title in rawindex: #['1812 invasion of Russia', '1857 revolt']:
if rx.match(title):
+ sys.stderr.write('SKIPPING: ' + title + "\n")
continue
sys.stderr.write('PROCESSING: ' + title + "\n")