Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/server.py
diff options
context:
space:
mode:
Diffstat (limited to 'server.py')
-rwxr-xr-xserver.py993
1 files changed, 993 insertions, 0 deletions
diff --git a/server.py b/server.py
new file mode 100755
index 0000000..14c0827
--- /dev/null
+++ b/server.py
@@ -0,0 +1,993 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2007, One Laptop Per Child
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Web server script for Wikiserver project.
+#
+# Usage: server.py <dbfile> <port>
+#
+## Standard libs
+from __future__ import with_statement
+import logging
+import sys
+import os
+import platform
+import select
+import codecs
+import BaseHTTPServer
+from SimpleHTTPServer import SimpleHTTPRequestHandler
+import SocketServer
+import socket
+
+import cgi
+import errno
+import urllib
+import tempfile
+import re
+try:
+ from hashlib import md5
+except ImportError:
+ from md5 import md5
+
+import dataretriever
+import pylru
+import simplejson
+
+##
+## Libs we ship -- add lib path for
+## shared objects
+##
+_root_path = os.path.dirname(__file__)
+# linux32_27" for Linux 32bits Python 2.7
+system_id = "%s%s" % (platform.system().lower(),
+ platform.architecture()[0][0:2])
+if platform.processor().startswith('arm'):
+ system_id = platform.processor()
+
+platform_dir = "%s_%s%s" % (system_id,
+ sys.version_info[0], # major
+ sys.version_info[1]) # minor
+
+sys.path.append(os.path.join(_root_path, 'binarylibs', platform_dir))
+
+import mwlib.htmlwriter
+from mwlib import parser, scanner, expander
+
+# Uncomment to print out a large dump from the template expander.
+#os.environ['DEBUG_EXPANDER'] = '1'
+
+
+class MyHTTPServer(BaseHTTPServer.HTTPServer):
+ def serve_forever(self, poll_interval=0.5):
+ """Overridden version of BaseServer.serve_forever that does not fail
+ to work when EINTR is received.
+ """
+ self._BaseServer__serving = True
+ self._BaseServer__is_shut_down.clear()
+ while self._BaseServer__serving:
+
+ # XXX: Consider using another file descriptor or
+ # connecting to the socket to wake this up instead of
+ # polling. Polling reduces our responsiveness to a
+ # shutdown request and wastes cpu at all other times.
+ try:
+ r, w, e = select.select([self], [], [], poll_interval)
+ except select.error, e:
+ if e[0] == errno.EINTR:
+ logging.debug("got eintr")
+ continue
+ raise
+ if r:
+ self._handle_request_noblock()
+ self._BaseServer__is_shut_down.set()
+
+ def server_bind(self):
+ """Override server_bind in HTTPServer to not use
+ getfqdn to get the server name because is very slow."""
+ SocketServer.TCPServer.server_bind(self)
+ host, port = self.socket.getsockname()[:2]
+ self.server_name = 'localhost'
+ self.server_port = port
+
+
+class WPWikiDB:
+ """Retrieves article contents for mwlib."""
+
+ def __init__(self, path, lang, templateprefix, templateblacklist):
+ self.lang = lang
+ self.templateprefix = templateprefix
+ self.templateblacklist = templateblacklist
+ self.dataretriever = dataretriever.DataRetriever(system_id, path)
+ self.templates_cache = {'!' : '|', u'!': '|'} # a special case
+
+ def getRawArticle(self, title, followRedirects=True):
+
+ # Retrieve article text, recursively following #redirects.
+ if title == '':
+ return ''
+
+ article_text = \
+ self.dataretriever.get_text_article(title).decode('utf-8')
+
+ # Stripping leading & trailing whitespace fixes template expansion.
+ article_text = article_text.lstrip()
+ article_text = article_text.rstrip()
+
+ return article_text
+
+ def getTemplate(self, title, followRedirects=False):
+ if title in self.templates_cache:
+ return self.templates_cache[title]
+ else:
+ try:
+ template_content = self.getRawArticle(title)
+ # check recursion in templates
+ template_name = title[title.find(':') + 1:]
+
+ # Remove <noinclude> because expandtemplates doesn't detect it
+ # and follow recursions
+ lower_content = template_content.lower()
+ start_noinclude = lower_content.find('<noinclude>')
+ while start_noinclude > -1:
+ end_noinclude = lower_content.find('</noinclude>')
+ content = template_content[:start_noinclude]
+ if end_noinclude > -1:
+ content = content + template_content[end_noinclude + \
+ len('</noinclude>'):]
+ template_content = content
+ lower_content = template_content.lower()
+ start_noinclude = lower_content.find('<noinclude>')
+
+ if re.search('{{' + template_name, template_content, \
+ re.IGNORECASE) is not None:
+ logging.error("Found recursion template %s" % title)
+ template_content = re.sub(template_name, '_not_found_',
+ template_content, re.IGNORECASE)
+
+ # Search again
+ if re.search('{{' + template_name, template_content, \
+ re.IGNORECASE) is not None:
+ template_content = ''
+
+ except:
+ template_content = ''
+
+ self.templates_cache[title] = template_content
+ return template_content
+
+ def expandArticle(self, article_text, title):
+ template_expander = expander.Expander(article_text, pagename=title,
+ wikidb=self, lang=self.lang,
+ templateprefix=self.templateprefix,
+ templateblacklist=self.templateblacklist)
+ expanded_article = template_expander.expandTemplates()
+
+ return expanded_article
+
+ def getExpandedArticle(self, title):
+ return self.expandArticle(self.getRawArticle(title), title)
+
+
+class WPImageDB:
+ """Retrieves images for mwlib."""
+ def __init__(self, basepath):
+ self.basepath = basepath
+
+ def hashpath(self, name):
+ name = name.replace(' ', '_')
+ name = name[:1].upper() + name[1:]
+ d = md5(name.encode('utf-8')).hexdigest()
+ return "/".join([d[0], d[:2], name])
+
+ def hashpath_dir(self, name):
+ name = name.replace(' ', '_')
+ name = name[:1].upper() + name[1:]
+ d = md5(name.encode('utf-8')).hexdigest()
+ return "/".join([d[0], d[:2]])
+
+ def getPath(self, name, size=None):
+ hashed_name = self.hashpath(name).encode('utf8')
+ path = self.basepath + '/%s' % hashed_name
+ return path
+
+ def getURL(self, name, size=None):
+ hashed_name = self.hashpath(name).encode('utf8')
+ if size is not None:
+ file_name = self.basepath + self.hashpath_dir(name) + '/' + \
+ ('%dpx-' % size) + name.replace(' ', '_')
+ else:
+ file_name = self.basepath + self.hashpath_dir(name) + '/' + \
+ name.replace(' ', '_')
+
+ if os.path.exists(file_name):
+ url = '/' + file_name
+ else:
+ if size is None:
+ url = 'http://upload.wikimedia.org/wikipedia/commons/' + \
+ hashed_name
+ else:
+ url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/' \
+ + hashed_name + ('/%dpx-' % size) + name.replace(' ', '_')
+ if re.match(r'.*\.svg$', url, re.IGNORECASE):
+ url = url + '.png'
+
+ #print "getUrl: %s -> %s" % (name.encode('utf8'), url.encode('utf8'))
+ return url
+
+
+class HTMLOutputBuffer:
+ """Buffers output and converts to utf8 as needed."""
+
+ def __init__(self):
+ self.buffer = ''
+
+ def write(self, obj):
+ if isinstance(obj, unicode):
+ self.buffer += obj.encode('utf8')
+ else:
+ self.buffer += obj
+
+ def getvalue(self):
+ return self.buffer
+
+
+class WPMathRenderer:
+
+ def __init__(self, html_writer):
+ self.writer = html_writer
+
+ def render(self, latex):
+ logging.debug("MathRenderer %s" % latex)
+ latex = latex.replace('\f', '\\f')
+ latex = latex.replace('\t', '\\t')
+ # \bold gives a error
+ latex = latex.replace('\\bold', '')
+
+ # postpone the process to do it with javascript at client side
+ mathml = '<script type="math/tex">' + latex + '</script>'
+ self.writer.math_processed = True
+ return mathml
+
+
+class WPHTMLWriter(mwlib.htmlwriter.HTMLWriter):
+ """Customizes HTML output from mwlib."""
+
+ def __init__(self, dataretriever, wfile, images=None, lang='en'):
+ self.dataretriever = dataretriever
+ self.gallerylevel = 0
+ self.lang = lang
+ self.math_processed = False
+ self.links_list = []
+
+ math_renderer = WPMathRenderer(self)
+ mwlib.htmlwriter.HTMLWriter.__init__(self, wfile, images,
+ math_renderer=math_renderer)
+
+ def writeLink(self, obj):
+ if obj.target is None:
+ return
+
+ article = obj.target
+ #print "writeLink", article, obj.caption
+ if article.startswith('#'):
+ #print "----> <a href='%s'>" % article
+ self.out.write("<a href='%s'>" % article)
+ else:
+
+ # Parser appending '/' characters to link targets for some reason.
+ article = article.rstrip('/')
+
+ title = article
+ title = title[0].capitalize() + title[1:]
+ title = title.replace("_", " ")
+ self.links_list.append(article)
+
+ parts = article.encode('utf-8').split('#')
+ parts[0] = parts[0].replace(" ", "_")
+ url = ("#".join([x for x in parts]))
+
+ self.out.write("<a href='/wiki/%s'>" % url)
+
+ if obj.children:
+ for x in obj.children:
+ self.write(x)
+ else:
+ self._write(obj.target)
+
+ self.out.write("</a>")
+
+ def writeImageLink(self, obj):
+ if self.images is None:
+ return
+
+ width = obj.width
+ height = obj.height
+
+ is_svg = re.match(r'.*\.svg$', obj.target, re.IGNORECASE)
+ is_thumb = obj.thumb or obj.frame or (self.gallerylevel > 0)
+
+ if (width or height) or is_thumb:
+ max_length = max(width, height)
+ if obj.thumb:
+ max_length = 180
+ if self.gallerylevel > 0:
+ max_length = 120
+ path = self.images.getPath(obj.target, size=max_length)
+ url_thumb = self.images.getURL(obj.target, size=max_length)
+ url = self.images.getURL(obj.target)
+ else:
+ path = self.images.getPath(obj.target)
+ url_thumb = self.images.getURL(obj.target)
+ url = url_thumb
+
+ if url_thumb is None:
+ return
+
+ # The following HTML generation code is copied closely from InstaView,
+ # which seems to approximate the nest of <div> tags needed to render
+ # images close to right.
+ # It's also been extended to support Gallery tags.
+ if self.imglevel == 0:
+ self.imglevel += 1
+
+ align = obj.align
+ thumb = obj.thumb
+ frame = obj.frame
+ caption = obj.caption
+
+ # SVG images must be included using <object data=''> rather than
+ # <img src=''>.
+ if re.match(r'.*\.svg$', url_thumb, re.IGNORECASE):
+ tag = 'object'
+ ref = 'data'
+ else:
+ tag = 'img'
+ ref = 'src'
+
+ # Hack to get galleries to look okay, in the absence of image
+ # dimensions.
+ if self.gallerylevel > 0:
+ width = 120
+
+ if thumb and not width:
+ width = 180 # FIXME: This should not be hardcoded
+
+ attr = ''
+ if width:
+ attr += 'width="%d" ' % width
+
+ img = '<%(tag)s %(ref)s="%(url)s" longdesc="%(cap)s" %(att)s>' % \
+ {'tag': tag, 'ref': ref, 'url': url_thumb, 'cap': caption,
+ 'att': attr} + '</%(tag)s>' % {'tag': tag}
+
+ center = False
+ if align == 'center':
+ center = True
+ align = None
+
+ if center:
+ self.out.write('<div class="center">')
+
+ if self.gallerylevel > 0:
+ self.out.write('<div class="gallerybox" ' +
+ 'style="width: 155px;">')
+
+ self.out.write('<div class="thumb" ' +
+ 'style="padding: 13px 0; width: 150px;">')
+ self.out.write('<div style="margin-left: auto; ' +
+ 'margin-right: auto; width: 120px;">')
+ self.out.write('<a href="%s" class="image" title="%s">' %
+ (url, caption))
+ self.out.write(img)
+ self.out.write('</a>')
+ self.out.write('</div>')
+ self.out.write('</div>')
+
+ self.out.write('<div class="gallerytext">')
+ self.out.write('<p>')
+ for x in obj.children:
+ self.write(x)
+ self.out.write('</p>')
+ self.out.write('</div>')
+
+ self.out.write('</div>')
+ elif frame or thumb:
+ if not align:
+ align = "right"
+ self.out.write('<div class="thumb t%s">' % align)
+
+ if not width:
+ width = 180 # default thumb width
+ self.out.write('<div style="width:%dpx;">' % (int(width) + 2))
+
+ if thumb:
+ self.out.write(img)
+ self.out.write('<div class="thumbcaption">')
+ self.out.write('<div class="magnify" style="float:right">')
+ self.out.write('<a href="%s" class="internal" ' % url +
+ 'title="Enlarge">')
+ self.out.write('<img src="/static/magnify-clip.png">' +
+ '</img>')
+ self.out.write('</a>')
+ self.out.write('</div>')
+ for x in obj.children:
+ self.write(x)
+ self.out.write('</div>')
+ else:
+ self.out.write(img)
+ self.out.write('<div class="thumbcaption">')
+ for x in obj.children:
+ self.write(x)
+ self.out.write('</div>')
+
+ self.out.write('</div>')
+ self.out.write('</div>')
+ elif align:
+ self.out.write('<div class="float%s">' % align)
+ self.out.write(img)
+ self.out.write('</div>')
+ else:
+ self.out.write(img)
+
+ if center:
+ self.out.write('</div>')
+
+ self.imglevel -= 1
+ else:
+ self.out.write('<a href="%s">' % url.encode('utf8'))
+
+ for x in obj.children:
+ self.write(x)
+
+ self.out.write('</a>')
+
+ def writeTagNode(self, t):
+ if t.caption == 'gallery':
+ self.out.write('<table class="gallery" cellspacing="0" ' +
+ 'cellpadding="0">')
+
+ self.gallerylevel += 1
+
+ # TODO: More than one row.
+ self.out.write('<tr>')
+
+ for x in t.children:
+ self.out.write('<td>')
+ self.write(x)
+ self.out.write('</td>')
+
+ self.out.write('</tr>')
+
+ self.gallerylevel -= 1
+
+ self.out.write('</table>')
+ else:
+ # All others handled by base class.
+ mwlib.htmlwriter.HTMLWriter.writeTagNode(self, t)
+
+
+class WikiRequestHandler(SimpleHTTPRequestHandler):
+ def __init__(self, wikidb, conf, links_cache, request, client_address,
+ server):
+ # pullcord is currently offline
+ # self.reporturl = 'pullcord.laptop.org:8000'
+ self.reporturl = False
+ self.port = conf['port']
+ self.lang = conf['lang']
+ self.templateprefix = conf['templateprefix']
+ self.templateblacklist = set(conf['templateblacklist'])
+ self.wpheader = conf['wpheader']
+ self.wpfooter = conf['wpfooter']
+ self.resultstitle = conf['resultstitle']
+ self.base_path = os.path.dirname(conf['path'])
+ self.links_cache = links_cache
+
+ if 'editdir' in conf:
+ self.editdir = conf['editdir']
+ else:
+ self.editdir = False
+ if 'giturl' in conf:
+ self.giturl = conf['giturl']
+ else:
+ self.giturl = False
+
+ self.wikidb = wikidb
+
+ self.client_address = client_address
+
+ SimpleHTTPRequestHandler.__init__(
+ self, request, client_address, server)
+
+ def get_wikitext(self, title):
+ article_text = self.wikidb.getRawArticle(title)
+ #print article_text
+ if self.editdir:
+ edited = self.get_editedarticle(title)
+ if edited:
+ article_text = edited
+
+ # Pass ?override=1 in the url to replace wikitext for testing
+ # the renderer.
+ if self.params.get('override', 0):
+ override = codecs.open('override.txt', 'r', 'utf-8')
+ article_text = override.read()
+ override.close()
+
+ # Pass ?noexpand=1 in the url to disable template expansion.
+ if not self.params.get('noexpand', 0) \
+ and not self.params.get('edit', 0):
+ article_text = self.wikidb.expandArticle(article_text, title)
+
+ return article_text
+
+ def write_wiki_html(self, htmlout, title, article_text):
+ tokens = scanner.tokenize(article_text, title)
+
+ wiki_parsed = parser.Parser(tokens, title).parse()
+ wiki_parsed.caption = title
+
+ imagedb = WPImageDB(self.base_path + '/images/')
+ writer = WPHTMLWriter(self.wikidb.dataretriever, htmlout,
+ images=imagedb, lang=self.lang)
+ writer.write(wiki_parsed)
+ self.links_cache[title] = writer.links_list
+ return writer.math_processed
+
+ def send_article(self, title):
+ article_text = self.get_wikitext(title)
+
+ # Capitalize the first letter of the article -- Trac #6991.
+ title = title[0].capitalize() + title[1:]
+
+ # Replace underscores with spaces in title.
+ title = title.replace("_", " ")
+
+ # Redirect to Wikipedia if the article text is empty
+ # (e.g. an image link)
+ if article_text == "":
+ self.send_response(301)
+ self.send_header("Location",
+ 'http://' + self.lang + '.wikipedia.org/wiki/' +
+ title.encode('utf8'))
+ self.end_headers()
+ return
+
+ # Pass ?raw=1 in the URL to see the raw wikitext (post expansion,
+ # unless noexpand=1 is also set).
+ if self.params.get('raw', 0):
+ self.send_response(200)
+ self.send_header("Content-Type", "text/plain; charset=utf-8")
+ self.end_headers()
+
+ self.wfile.write(article_text.encode('utf8'))
+ elif self.params.get('edit', 0):
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ self.wfile.write('<html><body><form method="POST">')
+ self.wfile.write('<input type="submit" value="OK"><br />')
+ self.wfile.write('<textarea name="wmcontent" rows="40" cols="80">')
+ htmlout = HTMLOutputBuffer()
+ htmlout.write(article_text.encode('utf8'))
+ self.wfile.write(htmlout.getvalue())
+ self.wfile.write("</textarea></form></body></html>")
+ else:
+ htmlout = HTMLOutputBuffer()
+
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ htmlout.write('<html xmlns="http://www.w3.org/1999/xhtml"> ')
+
+ htmlout.write("<head>")
+ htmlout.write("<title>%s</title>" % title.encode('utf8'))
+
+ htmlout.write("<style type='text/css' media='screen, projection'>"
+ "@import '/static/common.css';"\
+ "@import '/static/monobook.css';"\
+ "@import '/static/styles.css';"\
+ "@import '/static/shared.css';"\
+ "</style>")
+
+ htmlout.write("</head>")
+
+ htmlout.write("<body>")
+
+ htmlout.write("<h1>")
+ htmlout.write(title)
+ htmlout.write(' <font size="1">&middot; <a class="offsite" ')
+ htmlout.write('href="http://' + self.lang + '.wikipedia.org/wiki/')
+ htmlout.write(title)
+ htmlout.write('">' + self.wpheader + '</a> ')
+
+ if self.reporturl:
+ # Report rendering problem.
+ htmlout.write('&middot; <a class="offsite" ')
+ htmlout.write('href="http://%s/render?q=' % self.reporturl)
+ htmlout.write(title)
+ htmlout.write('">Haz clic aquí si esta página contiene ' +
+ 'errores de presentación</a> ')
+
+ # Report inappropriate content.
+ htmlout.write(' &middot; <a class="offsite" ')
+ htmlout.write('href="http://%s/report?q=' % self.reporturl)
+ htmlout.write(title)
+ htmlout.write('">Esta página contiene material inapropiado' +
+ '</a>')
+
+ if self.editdir:
+ htmlout.write(' &middot; <a ')
+ htmlout.write('href="http://localhost:%s/wiki/' % self.port)
+ htmlout.write(title)
+ htmlout.write('?edit=true">[ Editar ]</a>')
+ htmlout.write(' &middot; <a ')
+ htmlout.write('href="http://localhost:%s/wiki/' % self.port)
+ htmlout.write(title)
+ htmlout.write('?edit=true">[ Vista OK ]</a>')
+ if self.giturl:
+ htmlout.write(' &middot; <a ')
+ htmlout.write('href="%s' % self.giturl)
+ htmlout.write(title)
+ htmlout.write('">[ Historial ]</a>')
+
+ htmlout.write("</font>")
+ htmlout.write('</h1>')
+
+ needs_math = self.write_wiki_html(htmlout, title, article_text)
+
+ if needs_math:
+ # MathJs config
+ htmlout.write('<script type="text/x-mathjax-config">')
+ htmlout.write(' MathJax.Hub.Config({')
+ htmlout.write(' extensions: [],')
+ htmlout.write(' jax: ["input/TeX","output/HTML-CSS"],')
+ htmlout.write(' "HTML-CSS": {')
+ htmlout.write(' availableFonts:[],')
+ htmlout.write(' styles: {".MathJax_Preview": ' +
+ '{visibility: "hidden"}}')
+ htmlout.write(' }')
+ htmlout.write(' });')
+ htmlout.write('</script>')
+
+ htmlout.write("<script type='text/javascript' " +
+ "src='http://localhost:8000/static/MathJax/MathJax.js'>" +
+ "</script>")
+
+ # validate links
+ self.write_process_links_js(htmlout, title)
+
+ htmlout.write('<center>' + self.wpfooter + '</center>')
+ htmlout.write("</body>")
+ htmlout.write("</html>")
+
+ html = htmlout.getvalue()
+
+ self.wfile.write(html)
+
+ def write_process_links_js(self, htmlout, title):
+ """
+ write javascript to request a array of external links using ajax
+ and compare with the links in the page, if one link is external
+ change the url and the className
+ """
+ htmlout.write("<script type='text/javascript'>\n")
+ htmlout.write(" xmlhttp=new XMLHttpRequest();\n")
+ htmlout.write(" xmlhttp.onreadystatechange=function() {\n")
+ htmlout.write(" if (xmlhttp.readyState==4 && " \
+ "xmlhttp.status==200) {\n")
+ htmlout.write(" external_links = eval(xmlhttp.responseText);\n")
+ htmlout.write(" for (var i = 0; i < document.links.length;" \
+ "i++) {\n")
+ htmlout.write(" link_url = document.links[i].href;\n")
+ htmlout.write(" last_bar = link_url.lastIndexOf('/');\n")
+ htmlout.write(" loc_article = link_url.substr(last_bar+1);\n")
+ htmlout.write(" external = false;\n")
+ htmlout.write(" for (var j = 0; j < external_links.length;" \
+ "j++) {\n")
+ htmlout.write(" external_link = external_links[j]\n")
+
+ htmlout.write(" if (loc_article == external_link) {\n")
+ htmlout.write(" external = true; break;}\n")
+ htmlout.write(" }\n")
+ htmlout.write(" if (external) {\n")
+ link_baseurl = 'http://' + self.lang + '.wikipedia.org/wiki/'
+ htmlout.write((" href = '%s'" % link_baseurl) + \
+ "+ external_links[j];\n")
+ htmlout.write(" document.links[i].href = href;\n")
+ htmlout.write(" document.links[i].className = 'offsite';\n")
+ htmlout.write(" }\n")
+ htmlout.write(" }\n")
+ htmlout.write(" }\n")
+ htmlout.write(" };\n")
+
+ val_links = "http://localhost:%s/links/%s" % (self.port, title)
+ htmlout.write(" xmlhttp.open('GET','%s',true);" % val_links)
+ htmlout.write(" xmlhttp.send();")
+ htmlout.write("</script>")
+
+ def send_links(self, title):
+ """
+ send a json array of string with the list of url not availables
+ in the local database
+ """
+ links = self.links_cache[title]
+ # validate the links
+ external_links = []
+ articles_found = self.wikidb.dataretriever.check_existence_list(links)
+ for article in links:
+ if not dataretriever.normalize_title(article) in articles_found:
+ article = article.replace(" ", "_").encode('utf8')
+ # needed to have the same format than url in the page
+ # when is compared in javascript
+ quoted = urllib.quote(article, safe='~@#$&()*!+=:;,.?/\'')
+ external_links.append(quoted)
+
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+ self.wfile.write(simplejson.dumps(external_links))
+
+ def do_POST(self):
+
+ real_path = urllib.unquote(self.path)
+ real_path = unicode(real_path, 'utf8')
+
+ (real_path, sep, param_text) = real_path.partition('?')
+
+ # Wiki requests return article contents or redirect to Wikipedia.
+ m = re.match(r'^/wiki/(.+)$', real_path)
+ if self.editdir and m:
+ title = m.group(1)
+
+ self._save_page(title)
+
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ htmlout = HTMLOutputBuffer()
+ htmlout.write(title.encode('utf8'))
+
+ self.wfile.write('<html><body>Editado: ')
+ self.wfile.write('<a href="')
+
+ self.wfile.write(htmlout.getvalue())
+ self.wfile.write('">')
+ self.wfile.write(htmlout.getvalue())
+ self.wfile.write('</body></html>')
+
+ return
+
+ # Any other request redirects to the index page.
+ self.send_response(301)
+ self.send_header("Location", "/static/")
+ self.end_headers()
+
+ def _save_page(self, title):
+ formdata = cgi.FieldStorage(fp=self.rfile,
+ headers=self.headers, environ={'REQUEST_METHOD': 'POST'},
+ keep_blank_values=1)
+
+ user = formdata.getfirst('user')
+ comment = formdata.getfirst('comment')
+ wmcontent = formdata.getfirst('wmcontent')
+
+ # fix newlines
+ wmcontent = re.sub('\r', '', wmcontent)
+
+ fpath = self.getfpath('wiki', title)
+ # UGLY: racy.
+ if not os.path.exists(fpath):
+ self._saveorig(title)
+ (fh, tmpfpath) = tempfile.mkstemp(dir=os.path.dirname(fpath))
+ os.write(fh, wmcontent)
+ os.close(fh)
+ os.rename(tmpfpath, fpath)
+
+ return True
+
+ def getfpath(self, dir, title):
+ # may want to hash it
+ fpath = os.path.join(self.editdir, dir, title)
+ return fpath
+
+ def _saveorig(self, title):
+ article_text = self.wikidb.getRawArticle(title)
+ fpath = self.getfpath('wiki.orig', title)
+ fh = codecs.open(fpath, 'w', encoding='utf-8')
+ fh.write(article_text)
+ fh.close()
+
+ def get_editedarticle(self, title):
+ buf = None
+ fpath = self.getfpath('wiki', title)
+ if os.path.exists(fpath):
+ buf = codecs.open(fpath, 'r', encoding='utf-8').read()
+ return buf
+
+ def send_searchresult(self, title):
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ self.wfile.write("<html><head><title>"
+ + (self.resultstitle % title.encode('utf8'))
+ + "</title></head>")
+
+ self.wfile.write("<style type='text/css' media='screen, projection'>"\
+ "@import '/static/monobook.css';</style>")
+
+ self.wfile.write("</head>")
+
+ self.wfile.write("<body>")
+
+ self.wfile.write("<h1>" + (self.resultstitle % title.encode('utf8'))
+ + "</h1>")
+ self.wfile.write("<ul>")
+
+ articles = self.search(unicode(title))
+ for article in articles:
+ #if not result.startswith(self.templateprefix):
+ self.wfile.write('<li><a href="/wiki/%s">%s</a></li>' %
+ (article.encode('utf8'), article.encode('utf8')))
+
+ self.wfile.write("</ul>")
+
+ self.wfile.write("</body></html>")
+
+ def search(self, article_title):
+ return self.wikidb.dataretriever.search(article_title)
+
+ def send_image(self, path):
+ if os.path.exists(path.encode('utf8')[1:]):
+ # If image exists locally, serve it as normal.
+ SimpleHTTPRequestHandler.do_GET(self)
+ else:
+ # If not, redirect to wikimedia.
+ redirect_url = "http://upload.wikimedia.org/wikipedia/commons/%s" \
+ % path.encode('utf8')
+ self.send_response(301)
+ self.send_header("Location", redirect_url.encode('utf8'))
+ self.end_headers()
+
+ def handle_feedback(self, feedtype, article):
+ with codecs.open("feedback.log", "a", "utf-8") as f:
+ f.write(feedtype + "\t" + article + "\t" +
+ self.client_address[0] + "\n")
+ f.close()
+ self.send_response(200)
+ self.send_header("Content-Type", "text/html; charset=utf-8")
+ self.end_headers()
+
+ if feedtype == "render":
+ strtype = "un error de presentación"
+ elif feedtype == "report":
+ strtype = "material inapropriado"
+
+ self.wfile.write("<html><title>Comentario recibido</title>" +
+ "Gracias por reportar %s en la pagina <b>%s</b>.</html>" %
+ (strtype, article.encode('utf8')))
+
+ def do_GET(self):
+ real_path = urllib.unquote(self.path)
+ real_path = unicode(real_path, 'utf8')
+
+ (real_path, sep, param_text) = real_path.partition('?')
+ self.params = {}
+ for p in param_text.split('&'):
+ (key, sep, value) = p.partition('=')
+ self.params[key] = value
+
+ # Wiki requests return article contents or redirect to Wikipedia.
+ m = re.match(r'^/wiki/(.+)$', real_path)
+ if m:
+ self.send_article(m.group(1))
+ return
+
+ # Search requests return search results.
+ m = re.match(r'^/search$', real_path)
+ if m:
+ self.send_searchresult(self.params.get('q', ''))
+ return
+
+ # Image requests are handled locally or are referenced from Wikipedia.
+ # matches /es_PE/images/, /en_US/images/ etc
+ m = re.match(r'^/\w*/images/(.+)$', real_path)
+ if m:
+ self.send_image(real_path)
+ return
+
+ # Static requests handed off to SimpleHTTPServer.
+ m = re.match(r'^/(static|generated)/(.*)$', real_path)
+ if m:
+ SimpleHTTPRequestHandler.do_GET(self)
+ return
+
+ # Handle link validation requests
+ m = re.match(r'^/links/(.*)$', real_path)
+ if m:
+ self.send_links(m.group(1))
+ return
+
+ # Feedback links.
+ m = re.match(r'^/(report|render)$', real_path)
+ if m:
+ self.handle_feedback(m.group(1), self.params.get('q', ''))
+ return
+
+ # Any other request redirects to the index page.
+ self.send_response(301)
+ self.send_header("Location", "/static/")
+ self.end_headers()
+
+
+def run_server(confvars):
+
+ if 'editdir' in confvars:
+ try:
+ for dir in ['wiki', 'wiki.orig']:
+ fdirpath = os.path.join(confvars['editdir'], dir)
+ if not os.path.exists(fdirpath):
+ os.mkdir(fdirpath)
+ except:
+ logging.error("Error setting up directories:")
+ logging.debug("%s must be a writable directory" %
+ confvars['editdir'])
+
+ blacklistpath = os.path.join(os.path.dirname(confvars['path']),
+ 'template_blacklist')
+ logging.debug("Reading template_blacklist %s" % blacklistpath)
+ blacklist = set()
+ if os.path.exists(blacklistpath):
+ with open(blacklistpath, 'r') as f:
+ for line in f.readlines():
+ blacklist.add(line.rstrip().decode('utf8'))
+ logging.debug("Read %d blacklisted templates" % len(blacklist))
+
+ confvars['templateblacklist'] = blacklist
+ confvars['lang'] = confvars['path'][0:2]
+ confvars['flang'] = os.path.basename(confvars['path'])[0:5]
+
+ wikidb = WPWikiDB(confvars['path'], confvars['lang'],
+ confvars['templateprefix'], confvars['templateblacklist'])
+
+ links_cache = pylru.lrucache(10)
+
+ httpd = MyHTTPServer(('', confvars['port']),
+ lambda *args: WikiRequestHandler(wikidb, confvars, links_cache, *args))
+
+ if confvars['comandline']:
+ httpd.serve_forever()
+ else:
+ from threading import Thread
+ server = Thread(target=httpd.serve_forever)
+ server.setDaemon(True)
+ logging.debug("Before start server")
+ server.start()
+ logging.debug("After start server")
+
+ # Tell the world that we're ready to accept request.
+ logging.debug('Ready')
+
+
+if __name__ == '__main__':
+
+ logging.error("Execute the starting class for your language wikipedia")
+ logging.error("Ex: activity_es.py")