From 998e78ec3783909118045698237bf78db5eab892 Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Wed, 01 Dec 2010 17:29:30 +0000 Subject: Minor tweaks to expandtemplates and mergeupdates --- diff --git a/tools/expandtemplates.py b/tools/expandtemplates.py index d02e3c1..6b8a760 100755 --- a/tools/expandtemplates.py +++ b/tools/expandtemplates.py @@ -88,6 +88,7 @@ class WPWikiDB: if article_text == None: # something's wrong return None + #sys.stderr.write("!!!%s!!!" % article_text) article_text = unicode(article_text, 'utf8') # To see unmodified article_text, uncomment here. @@ -143,8 +144,8 @@ def load_db(dbname): # Cache articles and specially templates @lrudecorator(100) def wp_load_article(title): - #return wp.wp_load_article(title) - return wp_load_article_fork(title) + return wp.wp_load_article(title) + #return wp_load_article_fork(title) # Fork the wp lookup as a subprocess, so it can return None on error # wp.wp_load_article() exit(1)s on error . @@ -186,9 +187,9 @@ index = ArticleIndex('%s.index.txt' % sys.argv[1]) rawindex = index.rawindex() wikidb = WPWikiDB() -rx = re.compile('Plantilla:') +rx = re.compile('(Plantilla|Template|Wikipedia):') -for title in rawindex: +for title in rawindex: #['1812 invasion of Russia', '1857 revolt']: #rawindex: if rx.match(title): continue diff --git a/tools/mergeupdates.py b/tools/mergeupdates.py index cd118fe..9cb2a8c 100755 --- a/tools/mergeupdates.py +++ b/tools/mergeupdates.py @@ -17,6 +17,9 @@ def process_article(title, text): if title.lower() in blacklist: sys.stderr.write('Skipping %s\n' % title) return + if re.match('Wikipedia:', title): + sys.stderr.write('Skipping %s\n' % title) + return fpath = os.path.join(wikidir, title) if os.path.exists(fpath): -- cgit v0.9.1