diff options
author | Martin Langhoff <martin@laptop.org> | 2010-11-29 20:01:40 (GMT) |
---|---|---|
committer | Martin Langhoff <martin@laptop.org> | 2010-11-29 20:01:40 (GMT) |
commit | b83caa388080cbef85f26286e253cf16c31a60e4 (patch) | |
tree | 2335fa3f8d570df4e636e81455b156fadfa892c4 | |
parent | 8338de77ce4f04395216e013c80e1d1d9013b86a (diff) |
Synchronized WPWikiDB class - server.py gets some fixes
-rwxr-xr-x | server.py | 13 | ||||
-rwxr-xr-x | tools/expandtemplates.py | 5 |
2 files changed, 13 insertions, 5 deletions
@@ -98,8 +98,11 @@ class ArticleIndex: class WPWikiDB: """Retrieves article contents for mwlib.""" - def getRawArticle(self, title): + def getRawArticle(self, title, followRedirects=True): # Retrieve article text, recursively following #redirects. + if title == '': + return '' + oldtitle = "" while True: # Replace underscores with spaces in title. @@ -111,10 +114,16 @@ class WPWikiDB: article_text = "" break - article_text = unicode(wp_load_article(title.encode('utf8')), 'utf8') + article_text = wp_load_article(title.encode('utf8')) + if article_text == None: + # something's wrong + return None + article_text = unicode(article_text, 'utf8') # To see unmodified article_text, uncomment here. # print article_text + if not followRedirects: + break m = re.match(r'^\s*\#?redirect\s*\:?\s*\[\[(.*)\]\]', article_text, re.IGNORECASE|re.MULTILINE) if not m: break diff --git a/tools/expandtemplates.py b/tools/expandtemplates.py index aec43e2..d02e3c1 100755 --- a/tools/expandtemplates.py +++ b/tools/expandtemplates.py @@ -70,11 +70,10 @@ class WPWikiDB: def getRawArticle(self, title, followRedirects=True): # Retrieve article text, recursively following #redirects. - oldtitle = "" - if title == '': return '' - + + oldtitle = "" while True: # Replace underscores with spaces in title. title = title.replace("_", " ") |