%s

#!/usr/bin/env python # -*- coding: utf-8 -*- # # Copyright (C) 2007, One Laptop Per Child # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # # Web server script for Wikiserver project. # # Usage: server.py # ## Standard libs from __future__ import with_statement import logging import sys import os import platform import select import codecs import BaseHTTPServer from SimpleHTTPServer import SimpleHTTPRequestHandler import SocketServer import socket import cgi import errno import urllib import tempfile import re try: from hashlib import md5 except ImportError: from md5 import md5 import dataretriever import pylru import simplejson ## ## Libs we ship -- add lib path for ## shared objects ## _root_path = os.path.dirname(__file__) # linux32_27" for Linux 32bits Python 2.7 system_id = "%s%s" % (platform.system().lower(), platform.architecture()[0][0:2]) if platform.processor().startswith('arm'): system_id = platform.processor() platform_dir = "%s_%s%s" % (system_id, sys.version_info[0], # major sys.version_info[1]) # minor sys.path.append(os.path.join(_root_path, 'binarylibs', platform_dir)) import mwlib.htmlwriter from mwlib import parser, scanner, expander # Uncomment to print out a large dump from the template expander. #os.environ['DEBUG_EXPANDER'] = '1' class MyHTTPServer(BaseHTTPServer.HTTPServer): def serve_forever(self, poll_interval=0.5): """Overridden version of BaseServer.serve_forever that does not fail to work when EINTR is received. """ self._BaseServer__serving = True self._BaseServer__is_shut_down.clear() while self._BaseServer__serving: # XXX: Consider using another file descriptor or # connecting to the socket to wake this up instead of # polling. Polling reduces our responsiveness to a # shutdown request and wastes cpu at all other times. try: r, w, e = select.select([self], [], [], poll_interval) except select.error, e: if e[0] == errno.EINTR: logging.debug("got eintr") continue raise if r: self._handle_request_noblock() self._BaseServer__is_shut_down.set() def server_bind(self): """Override server_bind in HTTPServer to not use getfqdn to get the server name because is very slow.""" SocketServer.TCPServer.server_bind(self) host, port = self.socket.getsockname()[:2] self.server_name = 'localhost' self.server_port = port class WPWikiDB: """Retrieves article contents for mwlib.""" def __init__(self, path, lang, templateprefix, templateblacklist): self.lang = lang self.templateprefix = templateprefix self.templateblacklist = templateblacklist self.dataretriever = dataretriever.DataRetriever(system_id, path) self.templates_cache = {'!' : '|', u'!': '|'} # a special case def getRawArticle(self, title, followRedirects=True): # Retrieve article text, recursively following #redirects. if title == '': return '' article_text = \ self.dataretriever.get_text_article(title).decode('utf-8') # Stripping leading & trailing whitespace fixes template expansion. article_text = article_text.lstrip() article_text = article_text.rstrip() return article_text def getTemplate(self, title, followRedirects=False): if title in self.templates_cache: return self.templates_cache[title] else: try: template_content = self.getRawArticle(title) # check recursion in templates template_name = title[title.find(':') + 1:] # Remove because expandtemplates doesn't detect it # and follow recursions lower_content = template_content.lower() start_noinclude = lower_content.find('') while start_noinclude > -1: end_noinclude = lower_content.find('') content = template_content[:start_noinclude] if end_noinclude > -1: content = content + template_content[end_noinclude + \ len(''):] template_content = content lower_content = template_content.lower() start_noinclude = lower_content.find('') if re.search('{{' + template_name, template_content, \ re.IGNORECASE) is not None: logging.error("Found recursion template %s" % title) template_content = re.sub(template_name, '_not_found_', template_content, re.IGNORECASE) # Search again if re.search('{{' + template_name, template_content, \ re.IGNORECASE) is not None: template_content = '' except: template_content = '' self.templates_cache[title] = template_content return template_content def expandArticle(self, article_text, title): template_expander = expander.Expander(article_text, pagename=title, wikidb=self, lang=self.lang, templateprefix=self.templateprefix, templateblacklist=self.templateblacklist) expanded_article = template_expander.expandTemplates() return expanded_article def getExpandedArticle(self, title): return self.expandArticle(self.getRawArticle(title), title) class WPImageDB: """Retrieves images for mwlib.""" def __init__(self, basepath): self.basepath = basepath def hashpath(self, name): name = name.replace(' ', '_') name = name[:1].upper() + name[1:] d = md5(name.encode('utf-8')).hexdigest() return "/".join([d[0], d[:2], name]) def hashpath_dir(self, name): name = name.replace(' ', '_') name = name[:1].upper() + name[1:] d = md5(name.encode('utf-8')).hexdigest() return "/".join([d[0], d[:2]]) def getPath(self, name, size=None): hashed_name = self.hashpath(name).encode('utf8') path = self.basepath + '/%s' % hashed_name return path def getURL(self, name, size=None): hashed_name = self.hashpath(name).encode('utf8') if size is not None: file_name = self.basepath + self.hashpath_dir(name) + '/' + \ ('%dpx-' % size) + name.replace(' ', '_') else: file_name = self.basepath + self.hashpath_dir(name) + '/' + \ name.replace(' ', '_') if os.path.exists(file_name): url = '/' + file_name else: if size is None: url = 'http://upload.wikimedia.org/wikipedia/commons/' + \ hashed_name else: url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/' \ + hashed_name + ('/%dpx-' % size) + name.replace(' ', '_') if re.match(r'.*\.svg$', url, re.IGNORECASE): url = url + '.png' #print "getUrl: %s -> %s" % (name.encode('utf8'), url.encode('utf8')) return url class HTMLOutputBuffer: """Buffers output and converts to utf8 as needed.""" def __init__(self): self.buffer = '' def write(self, obj): if isinstance(obj, unicode): self.buffer += obj.encode('utf8') else: self.buffer += obj def getvalue(self): return self.buffer class WPMathRenderer: def __init__(self, html_writer): self.writer = html_writer def render(self, latex): logging.debug("MathRenderer %s" % latex) latex = latex.replace('\f', '\\f') latex = latex.replace('\t', '\\t') # \bold gives a error latex = latex.replace('\\bold', '') # postpone the process to do it with javascript at client side mathml = ' $' + latex + '$ ' self.writer.math_processed = True return mathml class WPHTMLWriter(mwlib.htmlwriter.HTMLWriter): """Customizes HTML output from mwlib.""" def __init__(self, dataretriever, wfile, images=None, lang='en'): self.dataretriever = dataretriever self.gallerylevel = 0 self.lang = lang self.math_processed = False self.links_list = [] math_renderer = WPMathRenderer(self) mwlib.htmlwriter.HTMLWriter.__init__(self, wfile, images, math_renderer=math_renderer) def writeLink(self, obj): if obj.target is None: return article = obj.target #print "writeLink", article, obj.caption if article.startswith('#'): #print "----> " % article self.out.write("" % article) else: # Parser appending '/' characters to link targets for some reason. article = article.rstrip('/') title = article title = title[0].capitalize() + title[1:] title = title.replace("_", " ") self.links_list.append(article) parts = article.encode('utf-8').split('#') parts[0] = parts[0].replace(" ", "_") url = ("#".join([x for x in parts])) self.out.write("" % url) if obj.children: for x in obj.children: self.write(x) else: self._write(obj.target) self.out.write("") def writeImageLink(self, obj): if self.images is None: return width = obj.width height = obj.height is_svg = re.match(r'.*\.svg$', obj.target, re.IGNORECASE) is_thumb = obj.thumb or obj.frame or (self.gallerylevel > 0) if (width or height) or is_thumb: max_length = max(width, height) if obj.thumb: max_length = 180 if self.gallerylevel > 0: max_length = 120 path = self.images.getPath(obj.target, size=max_length) url_thumb = self.images.getURL(obj.target, size=max_length) url = self.images.getURL(obj.target) else: path = self.images.getPath(obj.target) url_thumb = self.images.getURL(obj.target) url = url_thumb if url_thumb is None: return # The following HTML generation code is copied closely from InstaView, # which seems to approximate the nest of

tags needed to render # images close to right. # It's also been extended to support Gallery tags. if self.imglevel == 0: self.imglevel += 1 align = obj.align thumb = obj.thumb frame = obj.frame caption = obj.caption # SVG images must be included using

" + (self.resultstitle % title.encode('utf8')) + "