Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xmwlib/htmlwriter.py22
-rwxr-xr-xserver.py191
-rwxr-xr-xtools/mergeupdates.py52
-rwxr-xr-xwoip/sh/process-updates31
4 files changed, 262 insertions, 34 deletions
diff --git a/mwlib/htmlwriter.py b/mwlib/htmlwriter.py
index a970c9c..d169482 100755
--- a/mwlib/htmlwriter.py
+++ b/mwlib/htmlwriter.py
@@ -348,28 +348,6 @@ class HTMLWriter(object):
writeControl = writeText
def writeArticle(self, a):
- if a.caption:
- self.out.write("<h1>")
- self._write(a.caption)
- self.out.write(' <font size="1">&middot; <a class="offsite" ')
- self.out.write('href="http://es.wikipedia.org/wiki/')
- self._write(a.caption)
- self.out.write('">De Wikipedia, la enciclopedia libre</a> ')
-
- # Report rendering problem.
- self.out.write('&middot; <a class="offsite" ')
- self.out.write('href="http://pullcord.laptop.org:8000/render?q=')
- self._write(a.caption)
- self.out.write('">Haz clic aquí si esta página contiene errores de presentación</a> ')
-
- # Report inappropriate content.
- self.out.write('&middot; <a class="offsite" ')
- self.out.write('href="http://pullcord.laptop.org:8000/report?q=')
- self._write(a.caption)
- self.out.write('">Esta página contiene material inapropiado</a>')
-
- self.out.write("</font>")
- self.out.write('</h1>')
for x in a:
self.write(x)
diff --git a/server.py b/server.py
index a07c134..406d45b 100755
--- a/server.py
+++ b/server.py
@@ -30,8 +30,10 @@ import codecs
from StringIO import StringIO
import BaseHTTPServer
from SimpleHTTPServer import SimpleHTTPRequestHandler
+import cgi
import errno
import urllib
+import tempfile
import re
import wp
import xml.dom.minidom
@@ -82,7 +84,7 @@ class ArticleIndex:
def __init__(self, path):
self.article_index = set()
- with open(path, 'r') as f:
+ with codecs.open(path, mode='r', encoding='utf-8') as f:
for line in f.readlines():
m = re.search(r'(.*?)\s*\d+$', line)
if m is None:
@@ -109,7 +111,7 @@ class WPWikiDB:
break
article_text = unicode(wp.wp_load_article(title.encode('utf8')), 'utf8')
-
+
# To see unmodified article_text, uncomment here.
# print article_text
@@ -402,8 +404,21 @@ class WPHTMLWriter(mwlib.htmlwriter.HTMLWriter):
mwlib.htmlwriter.HTMLWriter.writeTagNode(self, t)
class WikiRequestHandler(SimpleHTTPRequestHandler):
- def __init__(self, index, request, client_address, server):
+ def __init__(self, index, conf, request, client_address, server):
+ # pullcord is currently offline
+ # self.reporturl = 'pullcord.laptop.org:8000'
+ self.reporturl = False
self.index = index
+ self.port = conf['port']
+ if conf.has_key('editdir'):
+ self.editdir = conf['editdir']
+ else:
+ self.editdir = False
+ if conf.has_key('giturl'):
+ self.giturl = conf['giturl']
+ else:
+ self.giturl = False
+
self.client_address = client_address
SimpleHTTPRequestHandler.__init__(
self, request, client_address, server)
@@ -411,7 +426,10 @@ class WikiRequestHandler(SimpleHTTPRequestHandler):
def get_wikitext(self, title):
wikidb = WPWikiDB()
article_text = wikidb.getRawArticle(title)
-
+ edited = self.get_editedarticle(title)
+ if edited:
+ article_text = edited
+
# Pass ?override=1 in the url to replace wikitext for testing the renderer.
if self.params.get('override', 0):
override = codecs.open('override.txt', 'r', 'utf-8')
@@ -419,7 +437,8 @@ class WikiRequestHandler(SimpleHTTPRequestHandler):
override.close()
# Pass ?noexpand=1 in the url to disable template expansion.
- if not self.params.get('noexpand', 0):
+ if not self.params.get('noexpand', 0) \
+ and not self.params.get('edit', 0):
article_text = wikidb.expandArticle(article_text, title)
return article_text
@@ -458,6 +477,20 @@ class WikiRequestHandler(SimpleHTTPRequestHandler):
self.end_headers()
self.wfile.write(article_text.encode('utf8'))
+ elif self.params.get('edit', 0):
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ self.wfile.write('<html><body><form method="POST">')
+ # self.wfile.write('User: <input type="text" size="30" name="user"><br />')
+ # self.wfile.write('Comment: <input type="text" size="100" name="comment"><br />')
+ self.wfile.write('<input type="submit" value="OK"><br />')
+ self.wfile.write('<textarea name="wmcontent" rows="40" cols="80" >')
+ htmlout = HTMLOutputBuffer()
+ htmlout.write(article_text.encode('utf8'))
+ self.wfile.write(htmlout.getvalue())
+ self.wfile.write("</textarea></form></body></html>")
else:
htmlout = HTMLOutputBuffer()
@@ -487,7 +520,45 @@ class WikiRequestHandler(SimpleHTTPRequestHandler):
htmlout.write("</head>")
htmlout.write("<body>")
-
+
+ htmlout.write("<h1>")
+ htmlout.write(title)
+ htmlout.write(' <font size="1">&middot; <a class="offsite" ')
+ htmlout.write('href="http://es.wikipedia.org/wiki/')
+ htmlout.write(title)
+ htmlout.write('">De Wikipedia, la enciclopedia libre</a> ')
+
+ if self.reporturl:
+ # Report rendering problem.
+ htmlout.write('&middot; <a class="offsite" ')
+ htmlout.write('href="http://%s/render?q=' % self.reporturl)
+ htmlout.write(title)
+ htmlout.write('">Haz clic aquí si esta página contiene errores de presentación</a> ')
+
+ # Report inappropriate content.
+ htmlout.write(' &middot; <a class="offsite" ')
+ htmlout.write('href="http://%s/report?q=' % self.reporturl)
+ htmlout.write(title)
+ htmlout.write('">Esta página contiene material inapropiado</a>')
+
+ if self.editdir:
+ htmlout.write(' &middot; <a ')
+ htmlout.write('href="http://localhost:%s/wiki/' % self.port)
+ htmlout.write(title)
+ htmlout.write('?edit=true">[ Editar ]</a>')
+ htmlout.write(' &middot; <a ')
+ htmlout.write('href="http://localhost:%s/wiki/' % self.port)
+ htmlout.write(title)
+ htmlout.write('?edit=true">[ Vista OK ]</a>')
+ if self.giturl:
+ htmlout.write(' &middot; <a ')
+ htmlout.write('href="%s' % self.giturl)
+ htmlout.write(title)
+ htmlout.write('">[ Historial ]</a>')
+
+ htmlout.write("</font>")
+ htmlout.write('</h1>')
+
self.write_wiki_html(htmlout, title, article_text)
htmlout.write('<center>Contenido disponible bajo los términos de la <a href="/static/es-gfdl.html">Licencia de documentación libre de GNU</a>. <br/> Wikipedia es una marca registrada de la organización sin ánimo de lucro Wikimedia Foundation, Inc.<br/><a href="/static/acerca.html">Acerca de Wikipedia</a> </center>')
@@ -505,6 +576,85 @@ class WikiRequestHandler(SimpleHTTPRequestHandler):
print "FAILED to tidy '%s'" % title
self.wfile.write(html)
+
+ def do_POST(self):
+
+ real_path = urllib.unquote(self.path)
+ real_path = unicode(real_path, 'utf8')
+
+ (real_path, sep, param_text) = real_path.partition('?')
+
+ # Wiki requests return article contents or redirect to Wikipedia.
+ m = re.match(r'^/wiki/(.+)$', real_path)
+ if self.editdir and m:
+ title = m.group(1)
+
+ self._save_page(title)
+
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ htmlout = HTMLOutputBuffer()
+ htmlout.write(title.encode('utf8'))
+
+ self.wfile.write('<html><body>Editado: ')
+ self.wfile.write('<a href="')
+
+ self.wfile.write(htmlout.getvalue())
+ self.wfile.write('">')
+ self.wfile.write(htmlout.getvalue())
+ self.wfile.write('</body></html>')
+
+ return
+
+ # Any other request redirects to the index page.
+ self.send_response(301)
+ self.send_header("Location", "/static/")
+ self.end_headers()
+
+ def _save_page(self, title):
+ formdata = cgi.FieldStorage(fp=self.rfile,
+ headers=self.headers, environ = {'REQUEST_METHOD':'POST'},
+ keep_blank_values = 1)
+
+ user = formdata.getfirst('user')
+ comment = formdata.getfirst('comment')
+ wmcontent = formdata.getfirst('wmcontent')
+
+ # fix newlines
+ wmcontent = re.sub('\r', '', wmcontent)
+
+ fpath = self.getfpath('wiki', title)
+ # UGLY: racy.
+ if not os.path.exists(fpath):
+ self._saveorig(title)
+ (fh, tmpfpath) = tempfile.mkstemp(dir=os.path.dirname(fpath))
+ os.write(fh, wmcontent)
+ os.close(fh)
+ os.rename(tmpfpath, fpath)
+
+ return True
+
+ def getfpath(self, dir, title):
+ # may want to hash it
+ fpath = os.path.join(self.editdir, dir, title)
+ return fpath
+
+ def _saveorig(self, title):
+ wikidb = WPWikiDB()
+ article_text = wikidb.getRawArticle(title)
+ fpath = self.getfpath('wiki.orig', title)
+ fh = codecs.open(fpath, 'w', encoding='utf-8')
+ fh.write(article_text)
+ fh.close()
+
+ def get_editedarticle(self, title):
+ buf = None
+ fpath = self.getfpath('wiki', title)
+ if os.path.exists(fpath):
+ buf = codecs.open(fpath, 'r', encoding='utf-8').read()
+ return buf
def send_searchresult(self, title):
self.send_response(200)
@@ -613,11 +763,21 @@ def load_db(dbname):
dbname + '.locate.prefixdb',
dbname + '.blocks.db')
-def run_server(path, port):
- index = ArticleIndex('%s.index.txt' % path)
+def run_server(confvars):
+ index = ArticleIndex('%s.index.txt' % confvars['path'])
- httpd = MyHTTPServer(('', port),
- lambda *args: WikiRequestHandler(index, *args))
+ if confvars.has_key('editdir'):
+ try:
+ for dir in ['wiki', 'wiki.orig']:
+ fdirpath = os.path.join(confvars['editdir'], dir)
+ if not os.path.exists(fdirpath):
+ os.mkdir(fdirpath)
+ except:
+ print "Error setting up directories:"
+ print "%s must be a writable directory" % confvars['editdir']
+
+ httpd = MyHTTPServer(('', confvars['port']),
+ lambda *args: WikiRequestHandler(index, confvars, *args))
if __name__ == '__main__':
httpd.serve_forever()
@@ -632,6 +792,13 @@ def run_server(path, port):
if __name__ == '__main__':
- load_db(sys.argv[1])
- run_server(sys.argv[1], int(sys.argv[2]))
+ conf = {'path': sys.argv[1],
+ 'port': int(sys.argv[2])}
+ if len(sys.argv) > 3:
+ conf['editdir'] = sys.argv[3]
+ if len(sys.argv) > 4:
+ conf['giturl'] = sys.argv[4]
+
+ load_db(conf['path'])
+ run_server(conf)
diff --git a/tools/mergeupdates.py b/tools/mergeupdates.py
new file mode 100755
index 0000000..0462a47
--- /dev/null
+++ b/tools/mergeupdates.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+
+import sys, re, os
+
+START_HEADING = chr(1)
+START_TEXT = chr(2)
+END_TEXT = chr(3)
+
+def process_article(title, text):
+ fpath = os.path.join(wikidir, title)
+ if os.path.exists(fpath):
+ sys.stderr.write('Merging %s\n' % fpath)
+ fc = open(fpath).read()
+ fc = re.sub('^\n+', '', fc)
+ fc = re.sub('\n+$', '', fc)
+ text = fc
+ sys.stdout.write(START_HEADING + '\n')
+ sys.stdout.write(title + '\n')
+ sys.stdout.write("%s\n" % len(text))
+ sys.stdout.write(START_TEXT + '\n')
+ sys.stdout.write(text + '\n')
+ sys.stdout.write(END_TEXT + '\n')
+
+buf = ''
+mode = 'title'
+wikidir = os.path.join(sys.argv[1], 'wiki')
+if not os.path.exists(wikidir):
+ print "Does not exist: " + wikidir
+ sys.exit(1)
+
+while True:
+ b = sys.stdin.read(1)
+ if not b:
+ break
+ if b == START_HEADING:
+ #sys.stderr.write('d start heading\n')
+ pass
+ elif b == START_TEXT:
+ buf = re.sub('^\n+', '', buf)
+ title = buf.split('\n')[0]
+ bytes = buf.split('\n')[1]
+ buf = ''
+ #sys.stderr.write('d start text\n')
+ elif b == END_TEXT:
+ buf = re.sub('^\n+', '', buf)
+ buf = re.sub('\n+$', '', buf)
+ process_article(title, buf)
+ buf = ''
+ title = ''
+ else:
+ buf += b
+
diff --git a/woip/sh/process-updates b/woip/sh/process-updates
new file mode 100755
index 0000000..57258c5
--- /dev/null
+++ b/woip/sh/process-updates
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+set -e
+
+if [ ! -n "$1" ]; then
+ echo "process <file.processed>"
+ exit 1
+else
+ file=`readlink -f $1`
+fi
+
+# drop the trailing '.processed'
+basename=${file%.processed}
+
+ifile="$basename.index.txt"
+echo "Creating index..."
+pushd woip/rb
+ruby ./index.rb $file > $ifile
+
+sfile="$basename.locate.db"
+echo "Creating locate index..."
+cat $ifile | LC_ALL=C /usr/libexec/locate.mklocatedb > $sfile
+
+spfile="$basename.locate.prefixdb"
+echo "Creating locate prefix index..."
+../c/lsearcher -f $sfile -c $spfile -n
+
+bfile="$basename.blocks.db"
+echo "Creating block index"
+../c/bzipreader -f $ofile -l | awk '{print $2;}' | ../c/blocks $bfile
+