diff options
author | Martin Langhoff <martin@laptop.org> | 2010-12-01 17:29:30 (GMT) |
---|---|---|
committer | Martin Langhoff <martin@laptop.org> | 2010-12-01 17:29:30 (GMT) |
commit | 998e78ec3783909118045698237bf78db5eab892 (patch) | |
tree | 93d4a6c31c00b1ed21f9285904b409031313726d | |
parent | 60467acb8c65323e8b779cd6f8d58c5accf83858 (diff) |
Minor tweaks to expandtemplates and mergeupdates
-rwxr-xr-x | tools/expandtemplates.py | 9 | ||||
-rwxr-xr-x | tools/mergeupdates.py | 3 |
2 files changed, 8 insertions, 4 deletions
diff --git a/tools/expandtemplates.py b/tools/expandtemplates.py index d02e3c1..6b8a760 100755 --- a/tools/expandtemplates.py +++ b/tools/expandtemplates.py @@ -88,6 +88,7 @@ class WPWikiDB: if article_text == None: # something's wrong return None + #sys.stderr.write("!!!%s!!!" % article_text) article_text = unicode(article_text, 'utf8') # To see unmodified article_text, uncomment here. @@ -143,8 +144,8 @@ def load_db(dbname): # Cache articles and specially templates @lrudecorator(100) def wp_load_article(title): - #return wp.wp_load_article(title) - return wp_load_article_fork(title) + return wp.wp_load_article(title) + #return wp_load_article_fork(title) # Fork the wp lookup as a subprocess, so it can return None on error # wp.wp_load_article() exit(1)s on error . @@ -186,9 +187,9 @@ index = ArticleIndex('%s.index.txt' % sys.argv[1]) rawindex = index.rawindex() wikidb = WPWikiDB() -rx = re.compile('Plantilla:') +rx = re.compile('(Plantilla|Template|Wikipedia):') -for title in rawindex: +for title in rawindex: #['1812 invasion of Russia', '1857 revolt']: #rawindex: if rx.match(title): continue diff --git a/tools/mergeupdates.py b/tools/mergeupdates.py index cd118fe..9cb2a8c 100755 --- a/tools/mergeupdates.py +++ b/tools/mergeupdates.py @@ -17,6 +17,9 @@ def process_article(title, text): if title.lower() in blacklist: sys.stderr.write('Skipping %s\n' % title) return + if re.match('Wikipedia:', title): + sys.stderr.write('Skipping %s\n' % title) + return fpath = os.path.join(wikidir, title) if os.path.exists(fpath): |