Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/mwlib/_expander.re
diff options
context:
space:
mode:
Diffstat (limited to 'mwlib/_expander.re')
-rw-r--r--mwlib/_expander.re206
1 files changed, 206 insertions, 0 deletions
diff --git a/mwlib/_expander.re b/mwlib/_expander.re
new file mode 100644
index 0000000..7abb2ac
--- /dev/null
+++ b/mwlib/_expander.re
@@ -0,0 +1,206 @@
+// -*- mode: c++ -*-
+// Copyright (c) 2007-2008 PediaPress GmbH
+// See README.txt for additional licensing information.
+
+#include <Python.h>
+
+#include <iostream>
+#include <assert.h>
+#include <vector>
+
+using namespace std;
+
+#define RET(x) {found(x); return x;}
+
+struct Token
+{
+ int type;
+ int start;
+ int len;
+};
+
+
+class MacroScanner
+{
+public:
+
+ MacroScanner(Py_UNICODE *_start, Py_UNICODE *_end) {
+ source = start = _start;
+ end = _end;
+ cursor = start;
+ }
+
+ int found(int val) {
+ if (val==5 && tokens.size()) {
+ Token &previous_token (tokens[tokens.size()-1]);
+ if (previous_token.type==val) {
+ previous_token.len += cursor-start;
+ return tokens.size()-1;
+ }
+ }
+ Token t;
+ t.type = val;
+ t.start = (start-source);
+ t.len = cursor-start;
+ tokens.push_back(t);
+ return tokens.size()-1;
+ }
+
+ inline int scan();
+
+ Py_UNICODE *source;
+
+ Py_UNICODE *start;
+ Py_UNICODE *cursor;
+ Py_UNICODE *end;
+ vector<Token> tokens;
+};
+
+
+int MacroScanner::scan()
+{
+
+std:
+
+ start=cursor;
+
+ Py_UNICODE *marker=cursor;
+
+ Py_UNICODE *save_cursor = cursor;
+
+
+#define YYCTYPE Py_UNICODE
+#define YYCURSOR cursor
+#define YYMARKER marker
+#define YYLIMIT (end)
+// #define YYFILL(n) return 0;
+
+/*!re2c
+re2c:yyfill:enable = 0 ;
+*/
+
+
+
+/*!re2c
+ "{"{2,} {RET(1);}
+ "}"{2,} {RET(2);}
+ "[[" | "]]" {RET(3);}
+ "|" {RET(6);}
+
+ '<noinclude>' {goto noinclude;}
+ '<nowiki>' {goto nowiki;}
+ '<imagemap' [^<>\000]* '>' {goto imagemap;}
+ '<math' [^<>\000]* '>' {goto math;}
+ '<gallery' [^<>\000]* '>' {goto gallery;}
+
+ "<!--[^\000<>]*-->" {RET(5);}
+
+ "\000" {RET(0);}
+ [^\000] {RET(5);}
+
+ */
+
+
+
+noinclude:
+/*!re2c
+ '</noinclude>' {goto std;}
+ [^\000] {goto noinclude;}
+ "\000" {cursor=start+11; RET(5);}
+ */
+
+nowiki:
+/*!re2c
+ '</nowiki>' {RET(5);}
+ [^\000] {goto nowiki;}
+ "\000" {RET(0);}
+ */
+
+math:
+/*!re2c
+ '</math>' {RET(5);}
+ [^\000] {goto math;}
+ "\000" {RET(0);}
+ */
+
+gallery:
+/*!re2c
+ '</gallery>' {RET(5);}
+ [^\000] {goto gallery;}
+ "\000" {RET(0);}
+ */
+
+imagemap:
+/*!re2c
+ '</imagemap>' {RET(5);}
+ [^\000] {goto imagemap;}
+ "\000" {RET(0);}
+ */
+
+pre:
+/*!re2c
+ '</pre>' {RET(5);}
+ [^\000] {goto pre;}
+ "\000" {RET(0);}
+ */
+
+}
+
+
+PyObject *py_scan(PyObject *self, PyObject *args)
+{
+ PyObject *arg1;
+ if (!PyArg_ParseTuple(args, "O:_expander.scan", &arg1)) {
+ return 0;
+ }
+ PyUnicodeObject *unistr = (PyUnicodeObject*)PyUnicode_FromObject(arg1);
+ if (unistr == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "parameter cannot be converted to unicode in _expander.scan");
+ return 0;
+ }
+
+ Py_UNICODE *start = unistr->str;
+ Py_UNICODE *end = start+unistr->length;
+
+
+ MacroScanner scanner (start, end);
+ Py_BEGIN_ALLOW_THREADS
+ while (scanner.scan()) {
+ }
+ Py_END_ALLOW_THREADS
+ Py_XDECREF(unistr);
+
+ // return PyList_New(0); // uncomment to see timings for scanning
+
+ int size = scanner.tokens.size();
+ PyObject *result = PyList_New(size);
+ if (!result) {
+ return 0;
+ }
+
+ for (int i=0; i<size; i++) {
+ Token t = scanner.tokens[i];
+ PyList_SET_ITEM(result, i, Py_BuildValue("iii", t.type, t.start, t.len));
+ }
+
+ return result;
+}
+
+
+
+static PyMethodDef module_functions[] = {
+ {"scan", (PyCFunction)py_scan, METH_VARARGS, "scan(text)"},
+ {0, 0},
+};
+
+
+
+extern "C" {
+ DL_EXPORT(void) init_expander();
+}
+
+DL_EXPORT(void) init_expander()
+{
+ /*PyObject *m =*/ Py_InitModule("_expander", module_functions);
+}