diff options
-rwxr-xr-x | mwlib/htmlwriter.py | 22 | ||||
-rwxr-xr-x | server.py | 191 | ||||
-rwxr-xr-x | tools/mergeupdates.py | 52 | ||||
-rwxr-xr-x | woip/sh/process-updates | 31 |
4 files changed, 262 insertions, 34 deletions
diff --git a/mwlib/htmlwriter.py b/mwlib/htmlwriter.py index a970c9c..d169482 100755 --- a/mwlib/htmlwriter.py +++ b/mwlib/htmlwriter.py @@ -348,28 +348,6 @@ class HTMLWriter(object): writeControl = writeText def writeArticle(self, a): - if a.caption: - self.out.write("<h1>") - self._write(a.caption) - self.out.write(' <font size="1">· <a class="offsite" ') - self.out.write('href="http://es.wikipedia.org/wiki/') - self._write(a.caption) - self.out.write('">De Wikipedia, la enciclopedia libre</a> ') - - # Report rendering problem. - self.out.write('· <a class="offsite" ') - self.out.write('href="http://pullcord.laptop.org:8000/render?q=') - self._write(a.caption) - self.out.write('">Haz clic aquí si esta página contiene errores de presentación</a> ') - - # Report inappropriate content. - self.out.write('· <a class="offsite" ') - self.out.write('href="http://pullcord.laptop.org:8000/report?q=') - self._write(a.caption) - self.out.write('">Esta página contiene material inapropiado</a>') - - self.out.write("</font>") - self.out.write('</h1>') for x in a: self.write(x) @@ -30,8 +30,10 @@ import codecs from StringIO import StringIO import BaseHTTPServer from SimpleHTTPServer import SimpleHTTPRequestHandler +import cgi import errno import urllib +import tempfile import re import wp import xml.dom.minidom @@ -82,7 +84,7 @@ class ArticleIndex: def __init__(self, path): self.article_index = set() - with open(path, 'r') as f: + with codecs.open(path, mode='r', encoding='utf-8') as f: for line in f.readlines(): m = re.search(r'(.*?)\s*\d+$', line) if m is None: @@ -109,7 +111,7 @@ class WPWikiDB: break article_text = unicode(wp.wp_load_article(title.encode('utf8')), 'utf8') - + # To see unmodified article_text, uncomment here. # print article_text @@ -402,8 +404,21 @@ class WPHTMLWriter(mwlib.htmlwriter.HTMLWriter): mwlib.htmlwriter.HTMLWriter.writeTagNode(self, t) class WikiRequestHandler(SimpleHTTPRequestHandler): - def __init__(self, index, request, client_address, server): + def __init__(self, index, conf, request, client_address, server): + # pullcord is currently offline + # self.reporturl = 'pullcord.laptop.org:8000' + self.reporturl = False self.index = index + self.port = conf['port'] + if conf.has_key('editdir'): + self.editdir = conf['editdir'] + else: + self.editdir = False + if conf.has_key('giturl'): + self.giturl = conf['giturl'] + else: + self.giturl = False + self.client_address = client_address SimpleHTTPRequestHandler.__init__( self, request, client_address, server) @@ -411,7 +426,10 @@ class WikiRequestHandler(SimpleHTTPRequestHandler): def get_wikitext(self, title): wikidb = WPWikiDB() article_text = wikidb.getRawArticle(title) - + edited = self.get_editedarticle(title) + if edited: + article_text = edited + # Pass ?override=1 in the url to replace wikitext for testing the renderer. if self.params.get('override', 0): override = codecs.open('override.txt', 'r', 'utf-8') @@ -419,7 +437,8 @@ class WikiRequestHandler(SimpleHTTPRequestHandler): override.close() # Pass ?noexpand=1 in the url to disable template expansion. - if not self.params.get('noexpand', 0): + if not self.params.get('noexpand', 0) \ + and not self.params.get('edit', 0): article_text = wikidb.expandArticle(article_text, title) return article_text @@ -458,6 +477,20 @@ class WikiRequestHandler(SimpleHTTPRequestHandler): self.end_headers() self.wfile.write(article_text.encode('utf8')) + elif self.params.get('edit', 0): + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + self.wfile.write('<html><body><form method="POST">') + # self.wfile.write('User: <input type="text" size="30" name="user"><br />') + # self.wfile.write('Comment: <input type="text" size="100" name="comment"><br />') + self.wfile.write('<input type="submit" value="OK"><br />') + self.wfile.write('<textarea name="wmcontent" rows="40" cols="80" >') + htmlout = HTMLOutputBuffer() + htmlout.write(article_text.encode('utf8')) + self.wfile.write(htmlout.getvalue()) + self.wfile.write("</textarea></form></body></html>") else: htmlout = HTMLOutputBuffer() @@ -487,7 +520,45 @@ class WikiRequestHandler(SimpleHTTPRequestHandler): htmlout.write("</head>") htmlout.write("<body>") - + + htmlout.write("<h1>") + htmlout.write(title) + htmlout.write(' <font size="1">· <a class="offsite" ') + htmlout.write('href="http://es.wikipedia.org/wiki/') + htmlout.write(title) + htmlout.write('">De Wikipedia, la enciclopedia libre</a> ') + + if self.reporturl: + # Report rendering problem. + htmlout.write('· <a class="offsite" ') + htmlout.write('href="http://%s/render?q=' % self.reporturl) + htmlout.write(title) + htmlout.write('">Haz clic aquí si esta página contiene errores de presentación</a> ') + + # Report inappropriate content. + htmlout.write(' · <a class="offsite" ') + htmlout.write('href="http://%s/report?q=' % self.reporturl) + htmlout.write(title) + htmlout.write('">Esta página contiene material inapropiado</a>') + + if self.editdir: + htmlout.write(' · <a ') + htmlout.write('href="http://localhost:%s/wiki/' % self.port) + htmlout.write(title) + htmlout.write('?edit=true">[ Editar ]</a>') + htmlout.write(' · <a ') + htmlout.write('href="http://localhost:%s/wiki/' % self.port) + htmlout.write(title) + htmlout.write('?edit=true">[ Vista OK ]</a>') + if self.giturl: + htmlout.write(' · <a ') + htmlout.write('href="%s' % self.giturl) + htmlout.write(title) + htmlout.write('">[ Historial ]</a>') + + htmlout.write("</font>") + htmlout.write('</h1>') + self.write_wiki_html(htmlout, title, article_text) htmlout.write('<center>Contenido disponible bajo los términos de la <a href="/static/es-gfdl.html">Licencia de documentación libre de GNU</a>. <br/> Wikipedia es una marca registrada de la organización sin ánimo de lucro Wikimedia Foundation, Inc.<br/><a href="/static/acerca.html">Acerca de Wikipedia</a> </center>') @@ -505,6 +576,85 @@ class WikiRequestHandler(SimpleHTTPRequestHandler): print "FAILED to tidy '%s'" % title self.wfile.write(html) + + def do_POST(self): + + real_path = urllib.unquote(self.path) + real_path = unicode(real_path, 'utf8') + + (real_path, sep, param_text) = real_path.partition('?') + + # Wiki requests return article contents or redirect to Wikipedia. + m = re.match(r'^/wiki/(.+)$', real_path) + if self.editdir and m: + title = m.group(1) + + self._save_page(title) + + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + htmlout = HTMLOutputBuffer() + htmlout.write(title.encode('utf8')) + + self.wfile.write('<html><body>Editado: ') + self.wfile.write('<a href="') + + self.wfile.write(htmlout.getvalue()) + self.wfile.write('">') + self.wfile.write(htmlout.getvalue()) + self.wfile.write('</body></html>') + + return + + # Any other request redirects to the index page. + self.send_response(301) + self.send_header("Location", "/static/") + self.end_headers() + + def _save_page(self, title): + formdata = cgi.FieldStorage(fp=self.rfile, + headers=self.headers, environ = {'REQUEST_METHOD':'POST'}, + keep_blank_values = 1) + + user = formdata.getfirst('user') + comment = formdata.getfirst('comment') + wmcontent = formdata.getfirst('wmcontent') + + # fix newlines + wmcontent = re.sub('\r', '', wmcontent) + + fpath = self.getfpath('wiki', title) + # UGLY: racy. + if not os.path.exists(fpath): + self._saveorig(title) + (fh, tmpfpath) = tempfile.mkstemp(dir=os.path.dirname(fpath)) + os.write(fh, wmcontent) + os.close(fh) + os.rename(tmpfpath, fpath) + + return True + + def getfpath(self, dir, title): + # may want to hash it + fpath = os.path.join(self.editdir, dir, title) + return fpath + + def _saveorig(self, title): + wikidb = WPWikiDB() + article_text = wikidb.getRawArticle(title) + fpath = self.getfpath('wiki.orig', title) + fh = codecs.open(fpath, 'w', encoding='utf-8') + fh.write(article_text) + fh.close() + + def get_editedarticle(self, title): + buf = None + fpath = self.getfpath('wiki', title) + if os.path.exists(fpath): + buf = codecs.open(fpath, 'r', encoding='utf-8').read() + return buf def send_searchresult(self, title): self.send_response(200) @@ -613,11 +763,21 @@ def load_db(dbname): dbname + '.locate.prefixdb', dbname + '.blocks.db') -def run_server(path, port): - index = ArticleIndex('%s.index.txt' % path) +def run_server(confvars): + index = ArticleIndex('%s.index.txt' % confvars['path']) - httpd = MyHTTPServer(('', port), - lambda *args: WikiRequestHandler(index, *args)) + if confvars.has_key('editdir'): + try: + for dir in ['wiki', 'wiki.orig']: + fdirpath = os.path.join(confvars['editdir'], dir) + if not os.path.exists(fdirpath): + os.mkdir(fdirpath) + except: + print "Error setting up directories:" + print "%s must be a writable directory" % confvars['editdir'] + + httpd = MyHTTPServer(('', confvars['port']), + lambda *args: WikiRequestHandler(index, confvars, *args)) if __name__ == '__main__': httpd.serve_forever() @@ -632,6 +792,13 @@ def run_server(path, port): if __name__ == '__main__': - load_db(sys.argv[1]) - run_server(sys.argv[1], int(sys.argv[2])) + conf = {'path': sys.argv[1], + 'port': int(sys.argv[2])} + if len(sys.argv) > 3: + conf['editdir'] = sys.argv[3] + if len(sys.argv) > 4: + conf['giturl'] = sys.argv[4] + + load_db(conf['path']) + run_server(conf) diff --git a/tools/mergeupdates.py b/tools/mergeupdates.py new file mode 100755 index 0000000..0462a47 --- /dev/null +++ b/tools/mergeupdates.py @@ -0,0 +1,52 @@ +#!/usr/bin/python + +import sys, re, os + +START_HEADING = chr(1) +START_TEXT = chr(2) +END_TEXT = chr(3) + +def process_article(title, text): + fpath = os.path.join(wikidir, title) + if os.path.exists(fpath): + sys.stderr.write('Merging %s\n' % fpath) + fc = open(fpath).read() + fc = re.sub('^\n+', '', fc) + fc = re.sub('\n+$', '', fc) + text = fc + sys.stdout.write(START_HEADING + '\n') + sys.stdout.write(title + '\n') + sys.stdout.write("%s\n" % len(text)) + sys.stdout.write(START_TEXT + '\n') + sys.stdout.write(text + '\n') + sys.stdout.write(END_TEXT + '\n') + +buf = '' +mode = 'title' +wikidir = os.path.join(sys.argv[1], 'wiki') +if not os.path.exists(wikidir): + print "Does not exist: " + wikidir + sys.exit(1) + +while True: + b = sys.stdin.read(1) + if not b: + break + if b == START_HEADING: + #sys.stderr.write('d start heading\n') + pass + elif b == START_TEXT: + buf = re.sub('^\n+', '', buf) + title = buf.split('\n')[0] + bytes = buf.split('\n')[1] + buf = '' + #sys.stderr.write('d start text\n') + elif b == END_TEXT: + buf = re.sub('^\n+', '', buf) + buf = re.sub('\n+$', '', buf) + process_article(title, buf) + buf = '' + title = '' + else: + buf += b + diff --git a/woip/sh/process-updates b/woip/sh/process-updates new file mode 100755 index 0000000..57258c5 --- /dev/null +++ b/woip/sh/process-updates @@ -0,0 +1,31 @@ +#!/bin/sh + +set -e + +if [ ! -n "$1" ]; then + echo "process <file.processed>" + exit 1 +else + file=`readlink -f $1` +fi + +# drop the trailing '.processed' +basename=${file%.processed} + +ifile="$basename.index.txt" +echo "Creating index..." +pushd woip/rb +ruby ./index.rb $file > $ifile + +sfile="$basename.locate.db" +echo "Creating locate index..." +cat $ifile | LC_ALL=C /usr/libexec/locate.mklocatedb > $sfile + +spfile="$basename.locate.prefixdb" +echo "Creating locate prefix index..." +../c/lsearcher -f $sfile -c $spfile -n + +bfile="$basename.blocks.db" +echo "Creating block index" +../c/bzipreader -f $ofile -l | awk '{print $2;}' | ../c/blocks $bfile + |