diff options
Diffstat (limited to 'server.py')
-rwxr-xr-x | server.py | 993 |
1 files changed, 993 insertions, 0 deletions
diff --git a/server.py b/server.py new file mode 100755 index 0000000..14c0827 --- /dev/null +++ b/server.py @@ -0,0 +1,993 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2007, One Laptop Per Child +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +# Web server script for Wikiserver project. +# +# Usage: server.py <dbfile> <port> +# +## Standard libs +from __future__ import with_statement +import logging +import sys +import os +import platform +import select +import codecs +import BaseHTTPServer +from SimpleHTTPServer import SimpleHTTPRequestHandler +import SocketServer +import socket + +import cgi +import errno +import urllib +import tempfile +import re +try: + from hashlib import md5 +except ImportError: + from md5 import md5 + +import dataretriever +import pylru +import simplejson + +## +## Libs we ship -- add lib path for +## shared objects +## +_root_path = os.path.dirname(__file__) +# linux32_27" for Linux 32bits Python 2.7 +system_id = "%s%s" % (platform.system().lower(), + platform.architecture()[0][0:2]) +if platform.processor().startswith('arm'): + system_id = platform.processor() + +platform_dir = "%s_%s%s" % (system_id, + sys.version_info[0], # major + sys.version_info[1]) # minor + +sys.path.append(os.path.join(_root_path, 'binarylibs', platform_dir)) + +import mwlib.htmlwriter +from mwlib import parser, scanner, expander + +# Uncomment to print out a large dump from the template expander. +#os.environ['DEBUG_EXPANDER'] = '1' + + +class MyHTTPServer(BaseHTTPServer.HTTPServer): + def serve_forever(self, poll_interval=0.5): + """Overridden version of BaseServer.serve_forever that does not fail + to work when EINTR is received. + """ + self._BaseServer__serving = True + self._BaseServer__is_shut_down.clear() + while self._BaseServer__serving: + + # XXX: Consider using another file descriptor or + # connecting to the socket to wake this up instead of + # polling. Polling reduces our responsiveness to a + # shutdown request and wastes cpu at all other times. + try: + r, w, e = select.select([self], [], [], poll_interval) + except select.error, e: + if e[0] == errno.EINTR: + logging.debug("got eintr") + continue + raise + if r: + self._handle_request_noblock() + self._BaseServer__is_shut_down.set() + + def server_bind(self): + """Override server_bind in HTTPServer to not use + getfqdn to get the server name because is very slow.""" + SocketServer.TCPServer.server_bind(self) + host, port = self.socket.getsockname()[:2] + self.server_name = 'localhost' + self.server_port = port + + +class WPWikiDB: + """Retrieves article contents for mwlib.""" + + def __init__(self, path, lang, templateprefix, templateblacklist): + self.lang = lang + self.templateprefix = templateprefix + self.templateblacklist = templateblacklist + self.dataretriever = dataretriever.DataRetriever(system_id, path) + self.templates_cache = {'!' : '|', u'!': '|'} # a special case + + def getRawArticle(self, title, followRedirects=True): + + # Retrieve article text, recursively following #redirects. + if title == '': + return '' + + article_text = \ + self.dataretriever.get_text_article(title).decode('utf-8') + + # Stripping leading & trailing whitespace fixes template expansion. + article_text = article_text.lstrip() + article_text = article_text.rstrip() + + return article_text + + def getTemplate(self, title, followRedirects=False): + if title in self.templates_cache: + return self.templates_cache[title] + else: + try: + template_content = self.getRawArticle(title) + # check recursion in templates + template_name = title[title.find(':') + 1:] + + # Remove <noinclude> because expandtemplates doesn't detect it + # and follow recursions + lower_content = template_content.lower() + start_noinclude = lower_content.find('<noinclude>') + while start_noinclude > -1: + end_noinclude = lower_content.find('</noinclude>') + content = template_content[:start_noinclude] + if end_noinclude > -1: + content = content + template_content[end_noinclude + \ + len('</noinclude>'):] + template_content = content + lower_content = template_content.lower() + start_noinclude = lower_content.find('<noinclude>') + + if re.search('{{' + template_name, template_content, \ + re.IGNORECASE) is not None: + logging.error("Found recursion template %s" % title) + template_content = re.sub(template_name, '_not_found_', + template_content, re.IGNORECASE) + + # Search again + if re.search('{{' + template_name, template_content, \ + re.IGNORECASE) is not None: + template_content = '' + + except: + template_content = '' + + self.templates_cache[title] = template_content + return template_content + + def expandArticle(self, article_text, title): + template_expander = expander.Expander(article_text, pagename=title, + wikidb=self, lang=self.lang, + templateprefix=self.templateprefix, + templateblacklist=self.templateblacklist) + expanded_article = template_expander.expandTemplates() + + return expanded_article + + def getExpandedArticle(self, title): + return self.expandArticle(self.getRawArticle(title), title) + + +class WPImageDB: + """Retrieves images for mwlib.""" + def __init__(self, basepath): + self.basepath = basepath + + def hashpath(self, name): + name = name.replace(' ', '_') + name = name[:1].upper() + name[1:] + d = md5(name.encode('utf-8')).hexdigest() + return "/".join([d[0], d[:2], name]) + + def hashpath_dir(self, name): + name = name.replace(' ', '_') + name = name[:1].upper() + name[1:] + d = md5(name.encode('utf-8')).hexdigest() + return "/".join([d[0], d[:2]]) + + def getPath(self, name, size=None): + hashed_name = self.hashpath(name).encode('utf8') + path = self.basepath + '/%s' % hashed_name + return path + + def getURL(self, name, size=None): + hashed_name = self.hashpath(name).encode('utf8') + if size is not None: + file_name = self.basepath + self.hashpath_dir(name) + '/' + \ + ('%dpx-' % size) + name.replace(' ', '_') + else: + file_name = self.basepath + self.hashpath_dir(name) + '/' + \ + name.replace(' ', '_') + + if os.path.exists(file_name): + url = '/' + file_name + else: + if size is None: + url = 'http://upload.wikimedia.org/wikipedia/commons/' + \ + hashed_name + else: + url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/' \ + + hashed_name + ('/%dpx-' % size) + name.replace(' ', '_') + if re.match(r'.*\.svg$', url, re.IGNORECASE): + url = url + '.png' + + #print "getUrl: %s -> %s" % (name.encode('utf8'), url.encode('utf8')) + return url + + +class HTMLOutputBuffer: + """Buffers output and converts to utf8 as needed.""" + + def __init__(self): + self.buffer = '' + + def write(self, obj): + if isinstance(obj, unicode): + self.buffer += obj.encode('utf8') + else: + self.buffer += obj + + def getvalue(self): + return self.buffer + + +class WPMathRenderer: + + def __init__(self, html_writer): + self.writer = html_writer + + def render(self, latex): + logging.debug("MathRenderer %s" % latex) + latex = latex.replace('\f', '\\f') + latex = latex.replace('\t', '\\t') + # \bold gives a error + latex = latex.replace('\\bold', '') + + # postpone the process to do it with javascript at client side + mathml = '<script type="math/tex">' + latex + '</script>' + self.writer.math_processed = True + return mathml + + +class WPHTMLWriter(mwlib.htmlwriter.HTMLWriter): + """Customizes HTML output from mwlib.""" + + def __init__(self, dataretriever, wfile, images=None, lang='en'): + self.dataretriever = dataretriever + self.gallerylevel = 0 + self.lang = lang + self.math_processed = False + self.links_list = [] + + math_renderer = WPMathRenderer(self) + mwlib.htmlwriter.HTMLWriter.__init__(self, wfile, images, + math_renderer=math_renderer) + + def writeLink(self, obj): + if obj.target is None: + return + + article = obj.target + #print "writeLink", article, obj.caption + if article.startswith('#'): + #print "----> <a href='%s'>" % article + self.out.write("<a href='%s'>" % article) + else: + + # Parser appending '/' characters to link targets for some reason. + article = article.rstrip('/') + + title = article + title = title[0].capitalize() + title[1:] + title = title.replace("_", " ") + self.links_list.append(article) + + parts = article.encode('utf-8').split('#') + parts[0] = parts[0].replace(" ", "_") + url = ("#".join([x for x in parts])) + + self.out.write("<a href='/wiki/%s'>" % url) + + if obj.children: + for x in obj.children: + self.write(x) + else: + self._write(obj.target) + + self.out.write("</a>") + + def writeImageLink(self, obj): + if self.images is None: + return + + width = obj.width + height = obj.height + + is_svg = re.match(r'.*\.svg$', obj.target, re.IGNORECASE) + is_thumb = obj.thumb or obj.frame or (self.gallerylevel > 0) + + if (width or height) or is_thumb: + max_length = max(width, height) + if obj.thumb: + max_length = 180 + if self.gallerylevel > 0: + max_length = 120 + path = self.images.getPath(obj.target, size=max_length) + url_thumb = self.images.getURL(obj.target, size=max_length) + url = self.images.getURL(obj.target) + else: + path = self.images.getPath(obj.target) + url_thumb = self.images.getURL(obj.target) + url = url_thumb + + if url_thumb is None: + return + + # The following HTML generation code is copied closely from InstaView, + # which seems to approximate the nest of <div> tags needed to render + # images close to right. + # It's also been extended to support Gallery tags. + if self.imglevel == 0: + self.imglevel += 1 + + align = obj.align + thumb = obj.thumb + frame = obj.frame + caption = obj.caption + + # SVG images must be included using <object data=''> rather than + # <img src=''>. + if re.match(r'.*\.svg$', url_thumb, re.IGNORECASE): + tag = 'object' + ref = 'data' + else: + tag = 'img' + ref = 'src' + + # Hack to get galleries to look okay, in the absence of image + # dimensions. + if self.gallerylevel > 0: + width = 120 + + if thumb and not width: + width = 180 # FIXME: This should not be hardcoded + + attr = '' + if width: + attr += 'width="%d" ' % width + + img = '<%(tag)s %(ref)s="%(url)s" longdesc="%(cap)s" %(att)s>' % \ + {'tag': tag, 'ref': ref, 'url': url_thumb, 'cap': caption, + 'att': attr} + '</%(tag)s>' % {'tag': tag} + + center = False + if align == 'center': + center = True + align = None + + if center: + self.out.write('<div class="center">') + + if self.gallerylevel > 0: + self.out.write('<div class="gallerybox" ' + + 'style="width: 155px;">') + + self.out.write('<div class="thumb" ' + + 'style="padding: 13px 0; width: 150px;">') + self.out.write('<div style="margin-left: auto; ' + + 'margin-right: auto; width: 120px;">') + self.out.write('<a href="%s" class="image" title="%s">' % + (url, caption)) + self.out.write(img) + self.out.write('</a>') + self.out.write('</div>') + self.out.write('</div>') + + self.out.write('<div class="gallerytext">') + self.out.write('<p>') + for x in obj.children: + self.write(x) + self.out.write('</p>') + self.out.write('</div>') + + self.out.write('</div>') + elif frame or thumb: + if not align: + align = "right" + self.out.write('<div class="thumb t%s">' % align) + + if not width: + width = 180 # default thumb width + self.out.write('<div style="width:%dpx;">' % (int(width) + 2)) + + if thumb: + self.out.write(img) + self.out.write('<div class="thumbcaption">') + self.out.write('<div class="magnify" style="float:right">') + self.out.write('<a href="%s" class="internal" ' % url + + 'title="Enlarge">') + self.out.write('<img src="/static/magnify-clip.png">' + + '</img>') + self.out.write('</a>') + self.out.write('</div>') + for x in obj.children: + self.write(x) + self.out.write('</div>') + else: + self.out.write(img) + self.out.write('<div class="thumbcaption">') + for x in obj.children: + self.write(x) + self.out.write('</div>') + + self.out.write('</div>') + self.out.write('</div>') + elif align: + self.out.write('<div class="float%s">' % align) + self.out.write(img) + self.out.write('</div>') + else: + self.out.write(img) + + if center: + self.out.write('</div>') + + self.imglevel -= 1 + else: + self.out.write('<a href="%s">' % url.encode('utf8')) + + for x in obj.children: + self.write(x) + + self.out.write('</a>') + + def writeTagNode(self, t): + if t.caption == 'gallery': + self.out.write('<table class="gallery" cellspacing="0" ' + + 'cellpadding="0">') + + self.gallerylevel += 1 + + # TODO: More than one row. + self.out.write('<tr>') + + for x in t.children: + self.out.write('<td>') + self.write(x) + self.out.write('</td>') + + self.out.write('</tr>') + + self.gallerylevel -= 1 + + self.out.write('</table>') + else: + # All others handled by base class. + mwlib.htmlwriter.HTMLWriter.writeTagNode(self, t) + + +class WikiRequestHandler(SimpleHTTPRequestHandler): + def __init__(self, wikidb, conf, links_cache, request, client_address, + server): + # pullcord is currently offline + # self.reporturl = 'pullcord.laptop.org:8000' + self.reporturl = False + self.port = conf['port'] + self.lang = conf['lang'] + self.templateprefix = conf['templateprefix'] + self.templateblacklist = set(conf['templateblacklist']) + self.wpheader = conf['wpheader'] + self.wpfooter = conf['wpfooter'] + self.resultstitle = conf['resultstitle'] + self.base_path = os.path.dirname(conf['path']) + self.links_cache = links_cache + + if 'editdir' in conf: + self.editdir = conf['editdir'] + else: + self.editdir = False + if 'giturl' in conf: + self.giturl = conf['giturl'] + else: + self.giturl = False + + self.wikidb = wikidb + + self.client_address = client_address + + SimpleHTTPRequestHandler.__init__( + self, request, client_address, server) + + def get_wikitext(self, title): + article_text = self.wikidb.getRawArticle(title) + #print article_text + if self.editdir: + edited = self.get_editedarticle(title) + if edited: + article_text = edited + + # Pass ?override=1 in the url to replace wikitext for testing + # the renderer. + if self.params.get('override', 0): + override = codecs.open('override.txt', 'r', 'utf-8') + article_text = override.read() + override.close() + + # Pass ?noexpand=1 in the url to disable template expansion. + if not self.params.get('noexpand', 0) \ + and not self.params.get('edit', 0): + article_text = self.wikidb.expandArticle(article_text, title) + + return article_text + + def write_wiki_html(self, htmlout, title, article_text): + tokens = scanner.tokenize(article_text, title) + + wiki_parsed = parser.Parser(tokens, title).parse() + wiki_parsed.caption = title + + imagedb = WPImageDB(self.base_path + '/images/') + writer = WPHTMLWriter(self.wikidb.dataretriever, htmlout, + images=imagedb, lang=self.lang) + writer.write(wiki_parsed) + self.links_cache[title] = writer.links_list + return writer.math_processed + + def send_article(self, title): + article_text = self.get_wikitext(title) + + # Capitalize the first letter of the article -- Trac #6991. + title = title[0].capitalize() + title[1:] + + # Replace underscores with spaces in title. + title = title.replace("_", " ") + + # Redirect to Wikipedia if the article text is empty + # (e.g. an image link) + if article_text == "": + self.send_response(301) + self.send_header("Location", + 'http://' + self.lang + '.wikipedia.org/wiki/' + + title.encode('utf8')) + self.end_headers() + return + + # Pass ?raw=1 in the URL to see the raw wikitext (post expansion, + # unless noexpand=1 is also set). + if self.params.get('raw', 0): + self.send_response(200) + self.send_header("Content-Type", "text/plain; charset=utf-8") + self.end_headers() + + self.wfile.write(article_text.encode('utf8')) + elif self.params.get('edit', 0): + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + self.wfile.write('<html><body><form method="POST">') + self.wfile.write('<input type="submit" value="OK"><br />') + self.wfile.write('<textarea name="wmcontent" rows="40" cols="80">') + htmlout = HTMLOutputBuffer() + htmlout.write(article_text.encode('utf8')) + self.wfile.write(htmlout.getvalue()) + self.wfile.write("</textarea></form></body></html>") + else: + htmlout = HTMLOutputBuffer() + + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + htmlout.write('<html xmlns="http://www.w3.org/1999/xhtml"> ') + + htmlout.write("<head>") + htmlout.write("<title>%s</title>" % title.encode('utf8')) + + htmlout.write("<style type='text/css' media='screen, projection'>" + "@import '/static/common.css';"\ + "@import '/static/monobook.css';"\ + "@import '/static/styles.css';"\ + "@import '/static/shared.css';"\ + "</style>") + + htmlout.write("</head>") + + htmlout.write("<body>") + + htmlout.write("<h1>") + htmlout.write(title) + htmlout.write(' <font size="1">· <a class="offsite" ') + htmlout.write('href="http://' + self.lang + '.wikipedia.org/wiki/') + htmlout.write(title) + htmlout.write('">' + self.wpheader + '</a> ') + + if self.reporturl: + # Report rendering problem. + htmlout.write('· <a class="offsite" ') + htmlout.write('href="http://%s/render?q=' % self.reporturl) + htmlout.write(title) + htmlout.write('">Haz clic aquí si esta página contiene ' + + 'errores de presentación</a> ') + + # Report inappropriate content. + htmlout.write(' · <a class="offsite" ') + htmlout.write('href="http://%s/report?q=' % self.reporturl) + htmlout.write(title) + htmlout.write('">Esta página contiene material inapropiado' + + '</a>') + + if self.editdir: + htmlout.write(' · <a ') + htmlout.write('href="http://localhost:%s/wiki/' % self.port) + htmlout.write(title) + htmlout.write('?edit=true">[ Editar ]</a>') + htmlout.write(' · <a ') + htmlout.write('href="http://localhost:%s/wiki/' % self.port) + htmlout.write(title) + htmlout.write('?edit=true">[ Vista OK ]</a>') + if self.giturl: + htmlout.write(' · <a ') + htmlout.write('href="%s' % self.giturl) + htmlout.write(title) + htmlout.write('">[ Historial ]</a>') + + htmlout.write("</font>") + htmlout.write('</h1>') + + needs_math = self.write_wiki_html(htmlout, title, article_text) + + if needs_math: + # MathJs config + htmlout.write('<script type="text/x-mathjax-config">') + htmlout.write(' MathJax.Hub.Config({') + htmlout.write(' extensions: [],') + htmlout.write(' jax: ["input/TeX","output/HTML-CSS"],') + htmlout.write(' "HTML-CSS": {') + htmlout.write(' availableFonts:[],') + htmlout.write(' styles: {".MathJax_Preview": ' + + '{visibility: "hidden"}}') + htmlout.write(' }') + htmlout.write(' });') + htmlout.write('</script>') + + htmlout.write("<script type='text/javascript' " + + "src='http://localhost:8000/static/MathJax/MathJax.js'>" + + "</script>") + + # validate links + self.write_process_links_js(htmlout, title) + + htmlout.write('<center>' + self.wpfooter + '</center>') + htmlout.write("</body>") + htmlout.write("</html>") + + html = htmlout.getvalue() + + self.wfile.write(html) + + def write_process_links_js(self, htmlout, title): + """ + write javascript to request a array of external links using ajax + and compare with the links in the page, if one link is external + change the url and the className + """ + htmlout.write("<script type='text/javascript'>\n") + htmlout.write(" xmlhttp=new XMLHttpRequest();\n") + htmlout.write(" xmlhttp.onreadystatechange=function() {\n") + htmlout.write(" if (xmlhttp.readyState==4 && " \ + "xmlhttp.status==200) {\n") + htmlout.write(" external_links = eval(xmlhttp.responseText);\n") + htmlout.write(" for (var i = 0; i < document.links.length;" \ + "i++) {\n") + htmlout.write(" link_url = document.links[i].href;\n") + htmlout.write(" last_bar = link_url.lastIndexOf('/');\n") + htmlout.write(" loc_article = link_url.substr(last_bar+1);\n") + htmlout.write(" external = false;\n") + htmlout.write(" for (var j = 0; j < external_links.length;" \ + "j++) {\n") + htmlout.write(" external_link = external_links[j]\n") + + htmlout.write(" if (loc_article == external_link) {\n") + htmlout.write(" external = true; break;}\n") + htmlout.write(" }\n") + htmlout.write(" if (external) {\n") + link_baseurl = 'http://' + self.lang + '.wikipedia.org/wiki/' + htmlout.write((" href = '%s'" % link_baseurl) + \ + "+ external_links[j];\n") + htmlout.write(" document.links[i].href = href;\n") + htmlout.write(" document.links[i].className = 'offsite';\n") + htmlout.write(" }\n") + htmlout.write(" }\n") + htmlout.write(" }\n") + htmlout.write(" };\n") + + val_links = "http://localhost:%s/links/%s" % (self.port, title) + htmlout.write(" xmlhttp.open('GET','%s',true);" % val_links) + htmlout.write(" xmlhttp.send();") + htmlout.write("</script>") + + def send_links(self, title): + """ + send a json array of string with the list of url not availables + in the local database + """ + links = self.links_cache[title] + # validate the links + external_links = [] + articles_found = self.wikidb.dataretriever.check_existence_list(links) + for article in links: + if not dataretriever.normalize_title(article) in articles_found: + article = article.replace(" ", "_").encode('utf8') + # needed to have the same format than url in the page + # when is compared in javascript + quoted = urllib.quote(article, safe='~@#$&()*!+=:;,.?/\'') + external_links.append(quoted) + + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + self.wfile.write(simplejson.dumps(external_links)) + + def do_POST(self): + + real_path = urllib.unquote(self.path) + real_path = unicode(real_path, 'utf8') + + (real_path, sep, param_text) = real_path.partition('?') + + # Wiki requests return article contents or redirect to Wikipedia. + m = re.match(r'^/wiki/(.+)$', real_path) + if self.editdir and m: + title = m.group(1) + + self._save_page(title) + + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + htmlout = HTMLOutputBuffer() + htmlout.write(title.encode('utf8')) + + self.wfile.write('<html><body>Editado: ') + self.wfile.write('<a href="') + + self.wfile.write(htmlout.getvalue()) + self.wfile.write('">') + self.wfile.write(htmlout.getvalue()) + self.wfile.write('</body></html>') + + return + + # Any other request redirects to the index page. + self.send_response(301) + self.send_header("Location", "/static/") + self.end_headers() + + def _save_page(self, title): + formdata = cgi.FieldStorage(fp=self.rfile, + headers=self.headers, environ={'REQUEST_METHOD': 'POST'}, + keep_blank_values=1) + + user = formdata.getfirst('user') + comment = formdata.getfirst('comment') + wmcontent = formdata.getfirst('wmcontent') + + # fix newlines + wmcontent = re.sub('\r', '', wmcontent) + + fpath = self.getfpath('wiki', title) + # UGLY: racy. + if not os.path.exists(fpath): + self._saveorig(title) + (fh, tmpfpath) = tempfile.mkstemp(dir=os.path.dirname(fpath)) + os.write(fh, wmcontent) + os.close(fh) + os.rename(tmpfpath, fpath) + + return True + + def getfpath(self, dir, title): + # may want to hash it + fpath = os.path.join(self.editdir, dir, title) + return fpath + + def _saveorig(self, title): + article_text = self.wikidb.getRawArticle(title) + fpath = self.getfpath('wiki.orig', title) + fh = codecs.open(fpath, 'w', encoding='utf-8') + fh.write(article_text) + fh.close() + + def get_editedarticle(self, title): + buf = None + fpath = self.getfpath('wiki', title) + if os.path.exists(fpath): + buf = codecs.open(fpath, 'r', encoding='utf-8').read() + return buf + + def send_searchresult(self, title): + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + self.wfile.write("<html><head><title>" + + (self.resultstitle % title.encode('utf8')) + + "</title></head>") + + self.wfile.write("<style type='text/css' media='screen, projection'>"\ + "@import '/static/monobook.css';</style>") + + self.wfile.write("</head>") + + self.wfile.write("<body>") + + self.wfile.write("<h1>" + (self.resultstitle % title.encode('utf8')) + + "</h1>") + self.wfile.write("<ul>") + + articles = self.search(unicode(title)) + for article in articles: + #if not result.startswith(self.templateprefix): + self.wfile.write('<li><a href="/wiki/%s">%s</a></li>' % + (article.encode('utf8'), article.encode('utf8'))) + + self.wfile.write("</ul>") + + self.wfile.write("</body></html>") + + def search(self, article_title): + return self.wikidb.dataretriever.search(article_title) + + def send_image(self, path): + if os.path.exists(path.encode('utf8')[1:]): + # If image exists locally, serve it as normal. + SimpleHTTPRequestHandler.do_GET(self) + else: + # If not, redirect to wikimedia. + redirect_url = "http://upload.wikimedia.org/wikipedia/commons/%s" \ + % path.encode('utf8') + self.send_response(301) + self.send_header("Location", redirect_url.encode('utf8')) + self.end_headers() + + def handle_feedback(self, feedtype, article): + with codecs.open("feedback.log", "a", "utf-8") as f: + f.write(feedtype + "\t" + article + "\t" + + self.client_address[0] + "\n") + f.close() + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.end_headers() + + if feedtype == "render": + strtype = "un error de presentación" + elif feedtype == "report": + strtype = "material inapropriado" + + self.wfile.write("<html><title>Comentario recibido</title>" + + "Gracias por reportar %s en la pagina <b>%s</b>.</html>" % + (strtype, article.encode('utf8'))) + + def do_GET(self): + real_path = urllib.unquote(self.path) + real_path = unicode(real_path, 'utf8') + + (real_path, sep, param_text) = real_path.partition('?') + self.params = {} + for p in param_text.split('&'): + (key, sep, value) = p.partition('=') + self.params[key] = value + + # Wiki requests return article contents or redirect to Wikipedia. + m = re.match(r'^/wiki/(.+)$', real_path) + if m: + self.send_article(m.group(1)) + return + + # Search requests return search results. + m = re.match(r'^/search$', real_path) + if m: + self.send_searchresult(self.params.get('q', '')) + return + + # Image requests are handled locally or are referenced from Wikipedia. + # matches /es_PE/images/, /en_US/images/ etc + m = re.match(r'^/\w*/images/(.+)$', real_path) + if m: + self.send_image(real_path) + return + + # Static requests handed off to SimpleHTTPServer. + m = re.match(r'^/(static|generated)/(.*)$', real_path) + if m: + SimpleHTTPRequestHandler.do_GET(self) + return + + # Handle link validation requests + m = re.match(r'^/links/(.*)$', real_path) + if m: + self.send_links(m.group(1)) + return + + # Feedback links. + m = re.match(r'^/(report|render)$', real_path) + if m: + self.handle_feedback(m.group(1), self.params.get('q', '')) + return + + # Any other request redirects to the index page. + self.send_response(301) + self.send_header("Location", "/static/") + self.end_headers() + + +def run_server(confvars): + + if 'editdir' in confvars: + try: + for dir in ['wiki', 'wiki.orig']: + fdirpath = os.path.join(confvars['editdir'], dir) + if not os.path.exists(fdirpath): + os.mkdir(fdirpath) + except: + logging.error("Error setting up directories:") + logging.debug("%s must be a writable directory" % + confvars['editdir']) + + blacklistpath = os.path.join(os.path.dirname(confvars['path']), + 'template_blacklist') + logging.debug("Reading template_blacklist %s" % blacklistpath) + blacklist = set() + if os.path.exists(blacklistpath): + with open(blacklistpath, 'r') as f: + for line in f.readlines(): + blacklist.add(line.rstrip().decode('utf8')) + logging.debug("Read %d blacklisted templates" % len(blacklist)) + + confvars['templateblacklist'] = blacklist + confvars['lang'] = confvars['path'][0:2] + confvars['flang'] = os.path.basename(confvars['path'])[0:5] + + wikidb = WPWikiDB(confvars['path'], confvars['lang'], + confvars['templateprefix'], confvars['templateblacklist']) + + links_cache = pylru.lrucache(10) + + httpd = MyHTTPServer(('', confvars['port']), + lambda *args: WikiRequestHandler(wikidb, confvars, links_cache, *args)) + + if confvars['comandline']: + httpd.serve_forever() + else: + from threading import Thread + server = Thread(target=httpd.serve_forever) + server.setDaemon(True) + logging.debug("Before start server") + server.start() + logging.debug("After start server") + + # Tell the world that we're ready to accept request. + logging.debug('Ready') + + +if __name__ == '__main__': + + logging.error("Execute the starting class for your language wikipedia") + logging.error("Ex: activity_es.py") |