diff options
author | Tomeu Vizoso <tomeu@tomeuvizoso.net> | 2008-09-26 15:48:39 (GMT) |
---|---|---|
committer | Tomeu Vizoso <tomeu@tomeuvizoso.net> | 2008-09-26 15:48:39 (GMT) |
commit | b905626986ac003db909ff28f7bcfd7fcd4eeca0 (patch) | |
tree | 7f2f0d6283ce37194067a1eaab37191b43109b7d | |
parent | 96564f6bb56079cc1eccd0230057f753fce7f6f4 (diff) |
Implement metadata reading in C for speed
-rw-r--r-- | Makefile.am | 3 | ||||
-rwxr-xr-x | autogen.sh | 2 | ||||
-rw-r--r-- | configure.ac | 5 | ||||
-rw-r--r-- | m4/python.m4 | 62 | ||||
-rw-r--r-- | src/olpc/datastore/Makefile.am | 13 | ||||
-rw-r--r-- | src/olpc/datastore/layoutmanager.py | 2 | ||||
-rw-r--r-- | src/olpc/datastore/metadatareader.c | 199 | ||||
-rw-r--r-- | src/olpc/datastore/metadatastore.py | 37 | ||||
-rw-r--r-- | tests/test_perf.py | 2 |
9 files changed, 288 insertions, 37 deletions
diff --git a/Makefile.am b/Makefile.am index 9afc54b..5fa2790 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,3 +1,5 @@ +ACLOCAL_AMFLAGS = -I m4 + SUBDIRS = bin etc src test: @@ -5,3 +7,4 @@ test: $(MAKE) -C tests test EXTRA_DIST = README.txt LICENSE.GPL + @@ -1,3 +1,5 @@ #!/bin/sh +export ACLOCAL="aclocal -I m4" + autoreconf -i ./configure "$@" diff --git a/configure.ac b/configure.ac index 65fdcf1..dda5092 100644 --- a/configure.ac +++ b/configure.ac @@ -2,11 +2,16 @@ AC_INIT([sugar-datastore],[0.8.0],[],[sugar-datastore]) AC_PREREQ([2.59]) +AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_SRCDIR([configure.ac]) AM_INIT_AUTOMAKE([1.9 foreign dist-bzip2 no-dist-gzip]) +AC_DISABLE_STATIC +AC_PROG_LIBTOOL + AM_PATH_PYTHON +AM_CHECK_PYTHON_HEADERS(,[AC_MSG_ERROR(could not find Python headers)]) AC_OUTPUT([ Makefile diff --git a/m4/python.m4 b/m4/python.m4 new file mode 100644 index 0000000..e1c5266 --- /dev/null +++ b/m4/python.m4 @@ -0,0 +1,62 @@ +## this one is commonly used with AM_PATH_PYTHONDIR ... +dnl AM_CHECK_PYMOD(MODNAME [,SYMBOL [,ACTION-IF-FOUND [,ACTION-IF-NOT-FOUND]]]) +dnl Check if a module containing a given symbol is visible to python. +AC_DEFUN([AM_CHECK_PYMOD], +[AC_REQUIRE([AM_PATH_PYTHON]) +py_mod_var=`echo $1['_']$2 | sed 'y%./+-%__p_%'` +AC_MSG_CHECKING(for ifelse([$2],[],,[$2 in ])python module $1) +AC_CACHE_VAL(py_cv_mod_$py_mod_var, [ +ifelse([$2],[], [prog=" +import sys +try: + import $1 +except ImportError: + sys.exit(1) +except: + sys.exit(0) +sys.exit(0)"], [prog=" +import $1 +$1.$2"]) +if $PYTHON -c "$prog" 1>&AC_FD_CC 2>&AC_FD_CC + then + eval "py_cv_mod_$py_mod_var=yes" + else + eval "py_cv_mod_$py_mod_var=no" + fi +]) +py_val=`eval "echo \`echo '$py_cv_mod_'$py_mod_var\`"` +if test "x$py_val" != xno; then + AC_MSG_RESULT(yes) + ifelse([$3], [],, [$3 +])dnl +else + AC_MSG_RESULT(no) + ifelse([$4], [],, [$4 +])dnl +fi +]) + +dnl a macro to check for ability to create python extensions +dnl AM_CHECK_PYTHON_HEADERS([ACTION-IF-POSSIBLE], [ACTION-IF-NOT-POSSIBLE]) +dnl function also defines PYTHON_INCLUDES +AC_DEFUN([AM_CHECK_PYTHON_HEADERS], +[AC_REQUIRE([AM_PATH_PYTHON]) +AC_MSG_CHECKING(for headers required to compile python extensions) +dnl deduce PYTHON_INCLUDES +py_prefix=`$PYTHON -c "import sys; print sys.prefix"` +py_exec_prefix=`$PYTHON -c "import sys; print sys.exec_prefix"` +PYTHON_INCLUDES="-I${py_prefix}/include/python${PYTHON_VERSION}" +if test "$py_prefix" != "$py_exec_prefix"; then + PYTHON_INCLUDES="$PYTHON_INCLUDES -I${py_exec_prefix}/include/python${PYTHON_VERSION}" +fi +AC_SUBST(PYTHON_INCLUDES) +dnl check if the headers exist: +save_CPPFLAGS="$CPPFLAGS" +CPPFLAGS="$CPPFLAGS $PYTHON_INCLUDES" +AC_TRY_CPP([#include <Python.h>],dnl +[AC_MSG_RESULT(found) +$1],dnl +[AC_MSG_RESULT(not found) +$2]) +CPPFLAGS="$save_CPPFLAGS" +]) diff --git a/src/olpc/datastore/Makefile.am b/src/olpc/datastore/Makefile.am index 78f2d03..43b6d14 100644 --- a/src/olpc/datastore/Makefile.am +++ b/src/olpc/datastore/Makefile.am @@ -6,3 +6,16 @@ datastore_PYTHON = \ indexstore.py \ layoutmanager.py \ metadatastore.py + +AM_CPPFLAGS = \ + $(WARN_CFLAGS) \ + $(EXT_CFLAGS) \ + $(PYTHON_INCLUDES) + +AM_LDFLAGS = -module -avoid-version + +pkgpyexecdir = $(pythondir)/olpc/datastore +pkgpyexec_LTLIBRARIES = metadatareader.la + +metadatareader_la_SOURCES = \ + metadatareader.c diff --git a/src/olpc/datastore/layoutmanager.py b/src/olpc/datastore/layoutmanager.py index db91690..a01139e 100644 --- a/src/olpc/datastore/layoutmanager.py +++ b/src/olpc/datastore/layoutmanager.py @@ -21,7 +21,7 @@ class LayoutManager(object): os.makedirs(path) def get_entry_path(self, uid): - return os.path.join(self._root_path, uid[:2], uid) + return '%s/%s/%s' % (self._root_path, uid[:2], uid) def get_index_path(self): return os.path.join(self._root_path, 'index') diff --git a/src/olpc/datastore/metadatareader.c b/src/olpc/datastore/metadatareader.c new file mode 100644 index 0000000..ce6d38e --- /dev/null +++ b/src/olpc/datastore/metadatareader.c @@ -0,0 +1,199 @@ +#include "Python.h" + +#include <dirent.h> + +// TODO: put it in a place where python can use it when writing metadata +#define MAX_PROPERTY_LENGTH 500 * 1024 + +static PyObject *byte_array_type = NULL; + +static PyObject * +metadatareader_retrieve(PyObject *unused, PyObject *args) +{ + PyObject *dict = NULL; + PyObject *properties = NULL; + const char *dir_path = NULL; + char *metadata_path = NULL; + DIR *dir_stream = NULL; + struct dirent *dir_entry = NULL; + char *file_path = NULL; + FILE *file = NULL; + char *value_buf = NULL; + + if (!PyArg_ParseTuple(args, "sO:retrieve", &dir_path, &properties)) + return NULL; + + // Build path to the metadata directory + int metadata_path_size = strlen(dir_path) + 10; + metadata_path = PyMem_Malloc(metadata_path_size); + if (metadata_path == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + snprintf (metadata_path, metadata_path_size, "%s/%s", dir_path, "metadata"); + + dir_stream = opendir (metadata_path); + if (dir_stream == NULL) { + char buf[256]; + snprintf(buf, sizeof(buf), "Couldn't open metadata directory %s", + metadata_path); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + dict = PyDict_New(); + + dir_entry = readdir(dir_stream); + while (dir_entry != NULL) { + long file_size; + int file_path_size; + PyObject *value = NULL; + + // Skip . and .. + if (dir_entry->d_name[0] == '.' && + (strlen(dir_entry->d_name) == 1 || + (dir_entry->d_name[1] == '.' && + strlen(dir_entry->d_name) == 2))) + goto next_property; + + // Check if the property is in the properties list + if ((properties != Py_None) && (PyList_Size(properties) > 0)) { + int found = 0; + int i; + for (i = 0; i < PyList_Size(properties); i++) { + PyObject *property = PyList_GetItem(properties, i); + if (!strcmp (dir_entry->d_name, PyString_AsString (property))) { + found = 1; + } + } + if (!found) { + goto next_property; + } + } + + // Build path of the property file + file_path_size = strlen(metadata_path) + 1 + strlen(dir_entry->d_name) + + 1; + file_path = PyMem_Malloc(file_path_size); + if (file_path == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + snprintf (file_path, file_path_size, "%s/%s", metadata_path, + dir_entry->d_name); + + file = fopen(file_path, "r"); + if (file == NULL) { + char buf[256]; + snprintf(buf, sizeof(buf), "Cannot open property file %s: %s", + file_path, strerror(errno)); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + // Get file size + fseek (file, 0, SEEK_END); + file_size = ftell (file); + rewind (file); + + if (file_size == 0) { + // Empty property + value = PyString_FromString(""); + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert value to python string"); + goto cleanup; + } + } else { + if (file_size > MAX_PROPERTY_LENGTH) { + PyErr_SetString(PyExc_ValueError, "Property file too big"); + goto cleanup; + } + + // Read the whole file + value_buf = PyMem_Malloc(file_size); + if (value_buf == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + long read_size = fread(value_buf, 1, file_size, file); + if (read_size < file_size) { + char buf[256]; + snprintf(buf, sizeof(buf), + "Error while reading property file %s", file_path); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + // Convert value to dbus.ByteArray + PyObject *args = Py_BuildValue("(s#)", value_buf, file_size); + value = PyObject_CallObject(byte_array_type, args); + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert value to dbus.ByteArray"); + goto cleanup; + } + } + + // Add property to the metadata dict + if (PyDict_SetItemString(dict, dir_entry->d_name, value) == -1) { + PyErr_SetString(PyExc_ValueError, + "Failed to add property to dictionary"); + goto cleanup; + } + + next_property: + if (file_path) { + PyMem_Free(file_path); + file_path = NULL; + } + if (file) { + fclose(file); + file = NULL; + } + if (value_buf) { + PyMem_Free(value_buf); + value_buf = NULL; + } + + dir_entry = readdir(dir_stream); + } + + closedir(dir_stream); + + return dict; + +cleanup: + if (file_path) { + PyMem_Free(file_path); + } + if (value_buf) { + PyMem_Free(value_buf); + } + if (dict) { + Py_DECREF(dict); + } + if (file) { + fclose(file); + } + if (dir_stream) { + closedir(dir_stream); + } + return NULL; +} + +static PyMethodDef metadatareader_functions[] = { + {"retrieve", metadatareader_retrieve, METH_VARARGS, PyDoc_STR("Read a dictionary from a file")}, + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initmetadatareader(void) +{ + PyObject* mod; + mod = Py_InitModule("metadatareader", metadatareader_functions); + + PyObject *dbus_module = PyImport_ImportModule("dbus"); + byte_array_type = PyObject_GetAttrString(dbus_module, "ByteArray"); +} + diff --git a/src/olpc/datastore/metadatastore.py b/src/olpc/datastore/metadatastore.py index 810ccb8..4f663a7 100644 --- a/src/olpc/datastore/metadatastore.py +++ b/src/olpc/datastore/metadatastore.py @@ -3,6 +3,7 @@ import logging import errno from olpc.datastore import layoutmanager +from olpc.datastore import metadatareader MAX_SIZE = 256 @@ -26,30 +27,7 @@ class MetadataStore(object): def retrieve(self, uid, properties=None): dir_path = layoutmanager.get_instance().get_entry_path(uid) - if not os.path.exists(dir_path): - raise ValueError('Unknown object: %r' % uid) - - metadata_path = os.path.join(dir_path, 'metadata') - metadata = {} - if properties is None or not properties: - properties = os.listdir(metadata_path) - - for key in properties: - property_path = metadata_path + '/' + key - try: - value = open(property_path, 'r').read() - except IOError, e: - if e.errno != errno.ENOENT: - raise - else: - if not value: - metadata[key] = '' - else: - # TODO: This class shouldn't know anything about dbus. - import dbus - metadata[key] = dbus.ByteArray(value) - - return metadata + return metadatareader.retrieve(dir_path, properties) def delete(self, uid): dir_path = layoutmanager.get_instance().get_entry_path(uid) @@ -58,14 +36,3 @@ class MetadataStore(object): os.remove(os.path.join(metadata_path, key)) os.rmdir(metadata_path) - def _cast_for_journal(self, key, value): - # Hack because the current journal expects these properties to have some - # predefined types - if key in ['timestamp', 'keep']: - try: - return int(value) - except ValueError: - return value - else: - return value - diff --git a/tests/test_perf.py b/tests/test_perf.py index 89996ac..d1e0269 100644 --- a/tests/test_perf.py +++ b/tests/test_perf.py @@ -66,7 +66,7 @@ data_store = dbus.Interface(proxy, DS_DBUS_INTERFACE) uids = [] -n = 1000 +n = 100 total = 0 print 'Creating %r entries' % n |