Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-26 15:48:39 (GMT)
committer Tomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-26 15:48:39 (GMT)
commitb905626986ac003db909ff28f7bcfd7fcd4eeca0 (patch)
tree7f2f0d6283ce37194067a1eaab37191b43109b7d
parent96564f6bb56079cc1eccd0230057f753fce7f6f4 (diff)
Implement metadata reading in C for speed
-rw-r--r--Makefile.am3
-rwxr-xr-xautogen.sh2
-rw-r--r--configure.ac5
-rw-r--r--m4/python.m462
-rw-r--r--src/olpc/datastore/Makefile.am13
-rw-r--r--src/olpc/datastore/layoutmanager.py2
-rw-r--r--src/olpc/datastore/metadatareader.c199
-rw-r--r--src/olpc/datastore/metadatastore.py37
-rw-r--r--tests/test_perf.py2
9 files changed, 288 insertions, 37 deletions
diff --git a/Makefile.am b/Makefile.am
index 9afc54b..5fa2790 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,3 +1,5 @@
+ACLOCAL_AMFLAGS = -I m4
+
SUBDIRS = bin etc src
test:
@@ -5,3 +7,4 @@ test:
$(MAKE) -C tests test
EXTRA_DIST = README.txt LICENSE.GPL
+
diff --git a/autogen.sh b/autogen.sh
index 9bd6fd0..1cd5db4 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,3 +1,5 @@
#!/bin/sh
+export ACLOCAL="aclocal -I m4"
+
autoreconf -i
./configure "$@"
diff --git a/configure.ac b/configure.ac
index 65fdcf1..dda5092 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,11 +2,16 @@ AC_INIT([sugar-datastore],[0.8.0],[],[sugar-datastore])
AC_PREREQ([2.59])
+AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_SRCDIR([configure.ac])
AM_INIT_AUTOMAKE([1.9 foreign dist-bzip2 no-dist-gzip])
+AC_DISABLE_STATIC
+AC_PROG_LIBTOOL
+
AM_PATH_PYTHON
+AM_CHECK_PYTHON_HEADERS(,[AC_MSG_ERROR(could not find Python headers)])
AC_OUTPUT([
Makefile
diff --git a/m4/python.m4 b/m4/python.m4
new file mode 100644
index 0000000..e1c5266
--- /dev/null
+++ b/m4/python.m4
@@ -0,0 +1,62 @@
+## this one is commonly used with AM_PATH_PYTHONDIR ...
+dnl AM_CHECK_PYMOD(MODNAME [,SYMBOL [,ACTION-IF-FOUND [,ACTION-IF-NOT-FOUND]]])
+dnl Check if a module containing a given symbol is visible to python.
+AC_DEFUN([AM_CHECK_PYMOD],
+[AC_REQUIRE([AM_PATH_PYTHON])
+py_mod_var=`echo $1['_']$2 | sed 'y%./+-%__p_%'`
+AC_MSG_CHECKING(for ifelse([$2],[],,[$2 in ])python module $1)
+AC_CACHE_VAL(py_cv_mod_$py_mod_var, [
+ifelse([$2],[], [prog="
+import sys
+try:
+ import $1
+except ImportError:
+ sys.exit(1)
+except:
+ sys.exit(0)
+sys.exit(0)"], [prog="
+import $1
+$1.$2"])
+if $PYTHON -c "$prog" 1>&AC_FD_CC 2>&AC_FD_CC
+ then
+ eval "py_cv_mod_$py_mod_var=yes"
+ else
+ eval "py_cv_mod_$py_mod_var=no"
+ fi
+])
+py_val=`eval "echo \`echo '$py_cv_mod_'$py_mod_var\`"`
+if test "x$py_val" != xno; then
+ AC_MSG_RESULT(yes)
+ ifelse([$3], [],, [$3
+])dnl
+else
+ AC_MSG_RESULT(no)
+ ifelse([$4], [],, [$4
+])dnl
+fi
+])
+
+dnl a macro to check for ability to create python extensions
+dnl AM_CHECK_PYTHON_HEADERS([ACTION-IF-POSSIBLE], [ACTION-IF-NOT-POSSIBLE])
+dnl function also defines PYTHON_INCLUDES
+AC_DEFUN([AM_CHECK_PYTHON_HEADERS],
+[AC_REQUIRE([AM_PATH_PYTHON])
+AC_MSG_CHECKING(for headers required to compile python extensions)
+dnl deduce PYTHON_INCLUDES
+py_prefix=`$PYTHON -c "import sys; print sys.prefix"`
+py_exec_prefix=`$PYTHON -c "import sys; print sys.exec_prefix"`
+PYTHON_INCLUDES="-I${py_prefix}/include/python${PYTHON_VERSION}"
+if test "$py_prefix" != "$py_exec_prefix"; then
+ PYTHON_INCLUDES="$PYTHON_INCLUDES -I${py_exec_prefix}/include/python${PYTHON_VERSION}"
+fi
+AC_SUBST(PYTHON_INCLUDES)
+dnl check if the headers exist:
+save_CPPFLAGS="$CPPFLAGS"
+CPPFLAGS="$CPPFLAGS $PYTHON_INCLUDES"
+AC_TRY_CPP([#include <Python.h>],dnl
+[AC_MSG_RESULT(found)
+$1],dnl
+[AC_MSG_RESULT(not found)
+$2])
+CPPFLAGS="$save_CPPFLAGS"
+])
diff --git a/src/olpc/datastore/Makefile.am b/src/olpc/datastore/Makefile.am
index 78f2d03..43b6d14 100644
--- a/src/olpc/datastore/Makefile.am
+++ b/src/olpc/datastore/Makefile.am
@@ -6,3 +6,16 @@ datastore_PYTHON = \
indexstore.py \
layoutmanager.py \
metadatastore.py
+
+AM_CPPFLAGS = \
+ $(WARN_CFLAGS) \
+ $(EXT_CFLAGS) \
+ $(PYTHON_INCLUDES)
+
+AM_LDFLAGS = -module -avoid-version
+
+pkgpyexecdir = $(pythondir)/olpc/datastore
+pkgpyexec_LTLIBRARIES = metadatareader.la
+
+metadatareader_la_SOURCES = \
+ metadatareader.c
diff --git a/src/olpc/datastore/layoutmanager.py b/src/olpc/datastore/layoutmanager.py
index db91690..a01139e 100644
--- a/src/olpc/datastore/layoutmanager.py
+++ b/src/olpc/datastore/layoutmanager.py
@@ -21,7 +21,7 @@ class LayoutManager(object):
os.makedirs(path)
def get_entry_path(self, uid):
- return os.path.join(self._root_path, uid[:2], uid)
+ return '%s/%s/%s' % (self._root_path, uid[:2], uid)
def get_index_path(self):
return os.path.join(self._root_path, 'index')
diff --git a/src/olpc/datastore/metadatareader.c b/src/olpc/datastore/metadatareader.c
new file mode 100644
index 0000000..ce6d38e
--- /dev/null
+++ b/src/olpc/datastore/metadatareader.c
@@ -0,0 +1,199 @@
+#include "Python.h"
+
+#include <dirent.h>
+
+// TODO: put it in a place where python can use it when writing metadata
+#define MAX_PROPERTY_LENGTH 500 * 1024
+
+static PyObject *byte_array_type = NULL;
+
+static PyObject *
+metadatareader_retrieve(PyObject *unused, PyObject *args)
+{
+ PyObject *dict = NULL;
+ PyObject *properties = NULL;
+ const char *dir_path = NULL;
+ char *metadata_path = NULL;
+ DIR *dir_stream = NULL;
+ struct dirent *dir_entry = NULL;
+ char *file_path = NULL;
+ FILE *file = NULL;
+ char *value_buf = NULL;
+
+ if (!PyArg_ParseTuple(args, "sO:retrieve", &dir_path, &properties))
+ return NULL;
+
+ // Build path to the metadata directory
+ int metadata_path_size = strlen(dir_path) + 10;
+ metadata_path = PyMem_Malloc(metadata_path_size);
+ if (metadata_path == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ snprintf (metadata_path, metadata_path_size, "%s/%s", dir_path, "metadata");
+
+ dir_stream = opendir (metadata_path);
+ if (dir_stream == NULL) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Couldn't open metadata directory %s",
+ metadata_path);
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ dict = PyDict_New();
+
+ dir_entry = readdir(dir_stream);
+ while (dir_entry != NULL) {
+ long file_size;
+ int file_path_size;
+ PyObject *value = NULL;
+
+ // Skip . and ..
+ if (dir_entry->d_name[0] == '.' &&
+ (strlen(dir_entry->d_name) == 1 ||
+ (dir_entry->d_name[1] == '.' &&
+ strlen(dir_entry->d_name) == 2)))
+ goto next_property;
+
+ // Check if the property is in the properties list
+ if ((properties != Py_None) && (PyList_Size(properties) > 0)) {
+ int found = 0;
+ int i;
+ for (i = 0; i < PyList_Size(properties); i++) {
+ PyObject *property = PyList_GetItem(properties, i);
+ if (!strcmp (dir_entry->d_name, PyString_AsString (property))) {
+ found = 1;
+ }
+ }
+ if (!found) {
+ goto next_property;
+ }
+ }
+
+ // Build path of the property file
+ file_path_size = strlen(metadata_path) + 1 + strlen(dir_entry->d_name) +
+ 1;
+ file_path = PyMem_Malloc(file_path_size);
+ if (file_path == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ snprintf (file_path, file_path_size, "%s/%s", metadata_path,
+ dir_entry->d_name);
+
+ file = fopen(file_path, "r");
+ if (file == NULL) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Cannot open property file %s: %s",
+ file_path, strerror(errno));
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ // Get file size
+ fseek (file, 0, SEEK_END);
+ file_size = ftell (file);
+ rewind (file);
+
+ if (file_size == 0) {
+ // Empty property
+ value = PyString_FromString("");
+ if (value == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to convert value to python string");
+ goto cleanup;
+ }
+ } else {
+ if (file_size > MAX_PROPERTY_LENGTH) {
+ PyErr_SetString(PyExc_ValueError, "Property file too big");
+ goto cleanup;
+ }
+
+ // Read the whole file
+ value_buf = PyMem_Malloc(file_size);
+ if (value_buf == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ long read_size = fread(value_buf, 1, file_size, file);
+ if (read_size < file_size) {
+ char buf[256];
+ snprintf(buf, sizeof(buf),
+ "Error while reading property file %s", file_path);
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ // Convert value to dbus.ByteArray
+ PyObject *args = Py_BuildValue("(s#)", value_buf, file_size);
+ value = PyObject_CallObject(byte_array_type, args);
+ if (value == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to convert value to dbus.ByteArray");
+ goto cleanup;
+ }
+ }
+
+ // Add property to the metadata dict
+ if (PyDict_SetItemString(dict, dir_entry->d_name, value) == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to add property to dictionary");
+ goto cleanup;
+ }
+
+ next_property:
+ if (file_path) {
+ PyMem_Free(file_path);
+ file_path = NULL;
+ }
+ if (file) {
+ fclose(file);
+ file = NULL;
+ }
+ if (value_buf) {
+ PyMem_Free(value_buf);
+ value_buf = NULL;
+ }
+
+ dir_entry = readdir(dir_stream);
+ }
+
+ closedir(dir_stream);
+
+ return dict;
+
+cleanup:
+ if (file_path) {
+ PyMem_Free(file_path);
+ }
+ if (value_buf) {
+ PyMem_Free(value_buf);
+ }
+ if (dict) {
+ Py_DECREF(dict);
+ }
+ if (file) {
+ fclose(file);
+ }
+ if (dir_stream) {
+ closedir(dir_stream);
+ }
+ return NULL;
+}
+
+static PyMethodDef metadatareader_functions[] = {
+ {"retrieve", metadatareader_retrieve, METH_VARARGS, PyDoc_STR("Read a dictionary from a file")},
+ {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initmetadatareader(void)
+{
+ PyObject* mod;
+ mod = Py_InitModule("metadatareader", metadatareader_functions);
+
+ PyObject *dbus_module = PyImport_ImportModule("dbus");
+ byte_array_type = PyObject_GetAttrString(dbus_module, "ByteArray");
+}
+
diff --git a/src/olpc/datastore/metadatastore.py b/src/olpc/datastore/metadatastore.py
index 810ccb8..4f663a7 100644
--- a/src/olpc/datastore/metadatastore.py
+++ b/src/olpc/datastore/metadatastore.py
@@ -3,6 +3,7 @@ import logging
import errno
from olpc.datastore import layoutmanager
+from olpc.datastore import metadatareader
MAX_SIZE = 256
@@ -26,30 +27,7 @@ class MetadataStore(object):
def retrieve(self, uid, properties=None):
dir_path = layoutmanager.get_instance().get_entry_path(uid)
- if not os.path.exists(dir_path):
- raise ValueError('Unknown object: %r' % uid)
-
- metadata_path = os.path.join(dir_path, 'metadata')
- metadata = {}
- if properties is None or not properties:
- properties = os.listdir(metadata_path)
-
- for key in properties:
- property_path = metadata_path + '/' + key
- try:
- value = open(property_path, 'r').read()
- except IOError, e:
- if e.errno != errno.ENOENT:
- raise
- else:
- if not value:
- metadata[key] = ''
- else:
- # TODO: This class shouldn't know anything about dbus.
- import dbus
- metadata[key] = dbus.ByteArray(value)
-
- return metadata
+ return metadatareader.retrieve(dir_path, properties)
def delete(self, uid):
dir_path = layoutmanager.get_instance().get_entry_path(uid)
@@ -58,14 +36,3 @@ class MetadataStore(object):
os.remove(os.path.join(metadata_path, key))
os.rmdir(metadata_path)
- def _cast_for_journal(self, key, value):
- # Hack because the current journal expects these properties to have some
- # predefined types
- if key in ['timestamp', 'keep']:
- try:
- return int(value)
- except ValueError:
- return value
- else:
- return value
-
diff --git a/tests/test_perf.py b/tests/test_perf.py
index 89996ac..d1e0269 100644
--- a/tests/test_perf.py
+++ b/tests/test_perf.py
@@ -66,7 +66,7 @@ data_store = dbus.Interface(proxy, DS_DBUS_INTERFACE)
uids = []
-n = 1000
+n = 100
total = 0
print 'Creating %r entries' % n