Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/mwlib/apps.py
diff options
context:
space:
mode:
Diffstat (limited to 'mwlib/apps.py')
-rw-r--r--mwlib/apps.py378
1 files changed, 378 insertions, 0 deletions
diff --git a/mwlib/apps.py b/mwlib/apps.py
new file mode 100644
index 0000000..55a427e
--- /dev/null
+++ b/mwlib/apps.py
@@ -0,0 +1,378 @@
+
+# Copyright (c) 2007-2008 PediaPress GmbH
+# See README.txt for additional licensing information.
+
+"""main programs - installed via setuptools' entry_points"""
+
+import optparse
+
+def buildcdb():
+ parser = optparse.OptionParser(usage="%prog --input XMLDUMP --output OUTPUT")
+ parser.add_option("-i", "--input", help="input file")
+ parser.add_option("-o", "--output", help="write output to OUTPUT")
+ options, args = parser.parse_args()
+
+ if args:
+ parser.error("too many arguments.")
+
+
+ input = options.input
+ output = options.output
+
+ if not (input and output):
+ parser.error("missing argument.")
+
+ import os
+ from mwlib import cdbwiki
+
+ cdbwiki.BuildWiki(input, output)()
+ open(os.path.join(output, "wikiconf.txt"), "w").write("""
+[wiki]
+type = cdb
+path = %s
+
+[images]
+type = download
+url = http://upload.wikimedia.org/wikipedia/commons/
+localpath = ~/images
+""" % (os.path.abspath(output),))
+
+def show():
+ parser = optparse.OptionParser(usage="%prog [-e|--expand] --conf CONF ARTICLE [...]")
+ parser.add_option("-c", "--conf", help="config file")
+ parser.add_option("-e", "--expand", action="store_true", help="expand templates")
+ parser.add_option("-t", "--template", action="store_true", help="show template")
+
+ options, args = parser.parse_args()
+
+ if not args:
+ parser.error("missing ARTICLE argument")
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ conf = options.conf
+ if not conf:
+ parser.error("missing --conf argument")
+
+ from mwlib import wiki, expander
+
+ db = wiki.makewiki(conf)['wiki']
+
+ for a in articles:
+ if options.template:
+ raw=db.getTemplate(a)
+ else:
+ raw=db.getRawArticle(a)
+
+ if raw:
+ if options.expand:
+ te = expander.Expander(raw, pagename=a, wikidb=db)
+ raw = te.expandTemplates()
+
+ print raw.encode("utf-8")
+
+
+def buildzip():
+ parser = optparse.OptionParser(usage="%prog [OPTIONS] [ARTICLE ...]")
+ parser.add_option("-c", "--conf", help="config file (required unless --baseurl is given)")
+ parser.add_option("-b", "--baseurl", help="base URL for mwapidb backend")
+ parser.add_option("-s", "--shared-baseurl", help="base URL for shared images for mwapidb backend")
+ parser.add_option("-m", "--metabook", help="JSON encoded text file with book structure")
+ parser.add_option('--collectionpage', help='Title of a collection page')
+ parser.add_option("-x", "--noimages", action="store_true", help="exclude images")
+ parser.add_option("-o", "--output", help="write output to OUTPUT")
+ parser.add_option("-p", "--posturl", help="http post to POSTURL")
+ parser.add_option("-i", "--imagesize",
+ help="max. pixel size (width or height) for images (default: 800)")
+ parser.add_option("-d", "--daemonize", action="store_true",
+ help='become daemon after collection articles (before POST request)')
+ parser.add_option("-l", "--logfile", help="log to logfile")
+ parser.add_option("--license", help="Title of article containing full license text")
+ parser.add_option("--template-blacklist", help="Title of article containing blacklisted templates")
+ options, args = parser.parse_args()
+
+ import tempfile
+ import os
+ import zipfile
+
+ from mwlib import utils
+ from mwlib.utils import daemonize
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ baseurl = options.baseurl
+ conf = options.conf
+ if not baseurl and not options.conf:
+ parser.error("neither --conf nor --baseurl specified\nuse --help for all options")
+
+ posturl = None
+ def post_status(status):
+ print 'status:', status
+ if not posturl:
+ return
+ try:
+ return urllib2.urlopen(posturl, urllib.urlencode({'status': status})).read()
+ except Exception, e:
+ print 'ERROR posting status %r to %r' % (status, posturl)
+
+ def post_progress(progress):
+ print 'progress', progress
+ if not posturl:
+ return
+ try:
+ return urllib2.urlopen(posturl, urllib.urlencode({'progress': int(progress)})).read()
+ except Exception, e:
+ print 'ERROR posting progress %r to %r' % (progress, posturl)
+
+ try:
+ if options.logfile:
+ utils.start_logging(options.logfile)
+
+ output = options.output
+
+ from mwlib import wiki, recorddb, metabook
+
+ mb = metabook.MetaBook()
+ if conf:
+ from ConfigParser import ConfigParser
+
+ w = wiki.makewiki(conf)
+ cp = ConfigParser()
+ cp.read(conf)
+ license = {
+ 'name': cp.get('wiki', 'defaultarticlelicense')
+ }
+ if license['name'] is not None:
+ license['wikitext'] = w['wiki'].getRawArticle(license['name'])
+ mb.source = {
+ 'name': cp.get('wiki', 'name'),
+ 'url': cp.get('wiki', 'url'),
+ 'defaultarticlelicense': license,
+ }
+ else:
+ w = {
+ 'wiki': wiki.wiki_mwapi(baseurl, options.license, options.template_blacklist),
+ 'images': wiki.image_mwapi(baseurl, shared_base_url=options.shared_baseurl)
+ }
+ metadata = w['wiki'].getMetaData()
+ mb.source = {
+ 'name': metadata['name'],
+ 'url': metadata['url'],
+ 'defaultarticlelicense': metadata['license'],
+ }
+
+ if options.noimages:
+ w['images'] = None
+ else:
+ if options.imagesize:
+ imagesize = int(options.imagesize)
+ else:
+ imagesize = 800
+
+ if output:
+ zipfilename = output
+ else:
+ fd, zipfilename = tempfile.mkstemp()
+ os.close(fd)
+
+ if options.collectionpage:
+ mwcollection = w['wiki'].getRawArticle(options.collectionpage)
+ mb.loadCollectionPage(mwcollection)
+ elif options.metabook:
+ mb.readJsonFile(options.metabook)
+
+ # do not daemonize earlier: Collection extension deletes input metabook file!
+ if options.daemonize:
+ daemonize()
+
+ posturl = options.posturl
+ if posturl:
+ posturl = posturl.encode('utf-8')
+
+ from mwlib.utils import get_multipart
+ import urllib
+ import urllib2
+
+ zf = zipfile.ZipFile(zipfilename, 'w')
+ z = recorddb.ZipfileCreator(zf, w['wiki'], w['images'])
+
+ post_status('parsing')
+
+ for x in articles:
+ z.addArticle(x)
+ mb.addArticles(articles)
+
+ z.addObject('metabook.json', mb.dumpJson())
+ articles = list(mb.getArticles())
+ if articles:
+ inc = 70/len(articles)
+ else:
+ inc = 0
+ p = 0
+ for title, revision in articles:
+ post_progress(p)
+ z.addArticle(title, revision=revision)
+ p += inc
+
+ post_status('packaging')
+
+ if not options.noimages:
+ z.writeImages(size=imagesize)
+
+ post_progress(80)
+
+ z.writeContent()
+ zf.close()
+
+ post_progress(90)
+
+ if posturl:
+ post_status('uploading')
+ zf = open(zipfilename, "rb")
+ ct, data = get_multipart('collection.zip', zf.read(), 'collection')
+ zf.close()
+ req = urllib2.Request(posturl, data=data, headers={"Content-Type": ct})
+ result = urllib2.urlopen(req).read()
+
+ if w['images']:
+ w['images'].clear()
+
+ if not output:
+ os.unlink(zipfilename)
+
+ post_status('finished')
+ post_progress(100)
+ except Exception, e:
+ post_status('error')
+ raise
+
+
+def parse():
+ parser = optparse.OptionParser(usage="%prog [-a|--all] --conf CONF [ARTICLE1 ...]")
+ parser.add_option("-a", "--all", action="store_true", help="parse all articles")
+ parser.add_option("--tb", action="store_true", help="show traceback on error")
+
+ parser.add_option("-c", "--conf", help="config file")
+
+ options, args = parser.parse_args()
+
+ if not args and not options.all:
+ parser.error("missing option.")
+
+ if not options.conf:
+ parser.error("missing --conf argument")
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ conf = options.conf
+
+ import traceback
+ from mwlib import wiki, uparser
+
+ w = wiki.makewiki(conf)
+
+ db = w['wiki']
+
+ if options.all:
+ if not hasattr(db, "articles"):
+ raise RuntimeError("%s does not support iterating over all articles" % (db, ))
+ articles = db.articles()
+
+
+ import time
+ for x in articles:
+ try:
+ raw = db.getRawArticle(x)
+ # yes, raw can be None, when we have a redirect to a non-existing article.
+ if raw is None:
+ continue
+ stime=time.time()
+ a=uparser.parseString(x, raw=raw, wikidb=db)
+ except Exception, err:
+ print "F", repr(x), err
+ if options.tb:
+ traceback.print_exc()
+ else:
+ print "G", time.time()-stime, repr(x)
+
+def serve():
+ parser = optparse.OptionParser(usage="%prog --conf CONF ARTICLE [...]")
+ parser.add_option("-c", "--conf", help="config file")
+
+ options, args = parser.parse_args()
+
+
+ conf = options.conf
+ if not options.conf:
+ parser.error("missing --conf argument")
+
+ from mwlib import wiki, web
+
+ res = wiki.makewiki(conf)
+ db = res['wiki']
+ images = res['images']
+ from wsgiref.simple_server import make_server, WSGIServer
+
+ from SocketServer import ForkingMixIn
+ class MyServer(ForkingMixIn, WSGIServer):
+ pass
+
+ iface, port = '0.0.0.0', 8080
+ print "serving on %s:%s" % (iface, port)
+ http = make_server(iface, port, web.Serve(db, res['images']), server_class=MyServer)
+ http.serve_forever()
+
+
+
+def html():
+ parser = optparse.OptionParser(usage="%prog --conf CONF ARTICLE [...]")
+ parser.add_option("-c", "--conf", help="config file")
+
+ options, args = parser.parse_args()
+
+ if not args:
+ parser.error("missing ARTICLE argument")
+
+ articles = [unicode(x, 'utf-8') for x in args]
+
+ conf = options.conf
+ if not options.conf:
+ parser.error("missing --conf argument")
+
+ import StringIO
+ import tempfile
+ import os
+ import webbrowser
+ from mwlib import wiki, uparser, htmlwriter
+
+ res = wiki.makewiki(conf)
+ db = res['wiki']
+ images = res['images']
+
+ for a in articles:
+ raw=db.getRawArticle(a)
+ if not raw:
+ continue
+
+ out=StringIO.StringIO()
+ out.write("""<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset="utf-8"></meta>
+<link rel="stylesheet" href="pedia.css" />
+</head>
+<body>
+
+""")
+
+ a=uparser.parseString(x, raw=raw, wikidb=db)
+ w=htmlwriter.HTMLWriter(out, images)
+ w.write(a)
+
+ fd, htmlfile = tempfile.mkstemp(".html")
+ os.close(fd)
+ open(htmlfile, "wb").write(out.getvalue().encode('utf-8'))
+ webbrowser.open("file://"+htmlfile)
+
+