diff options
author | Tomeu Vizoso <tomeu@tomeuvizoso.net> | 2008-09-26 15:48:39 (GMT) |
---|---|---|
committer | Tomeu Vizoso <tomeu@tomeuvizoso.net> | 2008-09-26 15:48:39 (GMT) |
commit | b905626986ac003db909ff28f7bcfd7fcd4eeca0 (patch) | |
tree | 7f2f0d6283ce37194067a1eaab37191b43109b7d /src | |
parent | 96564f6bb56079cc1eccd0230057f753fce7f6f4 (diff) |
Implement metadata reading in C for speed
Diffstat (limited to 'src')
-rw-r--r-- | src/olpc/datastore/Makefile.am | 13 | ||||
-rw-r--r-- | src/olpc/datastore/layoutmanager.py | 2 | ||||
-rw-r--r-- | src/olpc/datastore/metadatareader.c | 199 | ||||
-rw-r--r-- | src/olpc/datastore/metadatastore.py | 37 |
4 files changed, 215 insertions, 36 deletions
diff --git a/src/olpc/datastore/Makefile.am b/src/olpc/datastore/Makefile.am index 78f2d03..43b6d14 100644 --- a/src/olpc/datastore/Makefile.am +++ b/src/olpc/datastore/Makefile.am @@ -6,3 +6,16 @@ datastore_PYTHON = \ indexstore.py \ layoutmanager.py \ metadatastore.py + +AM_CPPFLAGS = \ + $(WARN_CFLAGS) \ + $(EXT_CFLAGS) \ + $(PYTHON_INCLUDES) + +AM_LDFLAGS = -module -avoid-version + +pkgpyexecdir = $(pythondir)/olpc/datastore +pkgpyexec_LTLIBRARIES = metadatareader.la + +metadatareader_la_SOURCES = \ + metadatareader.c diff --git a/src/olpc/datastore/layoutmanager.py b/src/olpc/datastore/layoutmanager.py index db91690..a01139e 100644 --- a/src/olpc/datastore/layoutmanager.py +++ b/src/olpc/datastore/layoutmanager.py @@ -21,7 +21,7 @@ class LayoutManager(object): os.makedirs(path) def get_entry_path(self, uid): - return os.path.join(self._root_path, uid[:2], uid) + return '%s/%s/%s' % (self._root_path, uid[:2], uid) def get_index_path(self): return os.path.join(self._root_path, 'index') diff --git a/src/olpc/datastore/metadatareader.c b/src/olpc/datastore/metadatareader.c new file mode 100644 index 0000000..ce6d38e --- /dev/null +++ b/src/olpc/datastore/metadatareader.c @@ -0,0 +1,199 @@ +#include "Python.h" + +#include <dirent.h> + +// TODO: put it in a place where python can use it when writing metadata +#define MAX_PROPERTY_LENGTH 500 * 1024 + +static PyObject *byte_array_type = NULL; + +static PyObject * +metadatareader_retrieve(PyObject *unused, PyObject *args) +{ + PyObject *dict = NULL; + PyObject *properties = NULL; + const char *dir_path = NULL; + char *metadata_path = NULL; + DIR *dir_stream = NULL; + struct dirent *dir_entry = NULL; + char *file_path = NULL; + FILE *file = NULL; + char *value_buf = NULL; + + if (!PyArg_ParseTuple(args, "sO:retrieve", &dir_path, &properties)) + return NULL; + + // Build path to the metadata directory + int metadata_path_size = strlen(dir_path) + 10; + metadata_path = PyMem_Malloc(metadata_path_size); + if (metadata_path == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + snprintf (metadata_path, metadata_path_size, "%s/%s", dir_path, "metadata"); + + dir_stream = opendir (metadata_path); + if (dir_stream == NULL) { + char buf[256]; + snprintf(buf, sizeof(buf), "Couldn't open metadata directory %s", + metadata_path); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + dict = PyDict_New(); + + dir_entry = readdir(dir_stream); + while (dir_entry != NULL) { + long file_size; + int file_path_size; + PyObject *value = NULL; + + // Skip . and .. + if (dir_entry->d_name[0] == '.' && + (strlen(dir_entry->d_name) == 1 || + (dir_entry->d_name[1] == '.' && + strlen(dir_entry->d_name) == 2))) + goto next_property; + + // Check if the property is in the properties list + if ((properties != Py_None) && (PyList_Size(properties) > 0)) { + int found = 0; + int i; + for (i = 0; i < PyList_Size(properties); i++) { + PyObject *property = PyList_GetItem(properties, i); + if (!strcmp (dir_entry->d_name, PyString_AsString (property))) { + found = 1; + } + } + if (!found) { + goto next_property; + } + } + + // Build path of the property file + file_path_size = strlen(metadata_path) + 1 + strlen(dir_entry->d_name) + + 1; + file_path = PyMem_Malloc(file_path_size); + if (file_path == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + snprintf (file_path, file_path_size, "%s/%s", metadata_path, + dir_entry->d_name); + + file = fopen(file_path, "r"); + if (file == NULL) { + char buf[256]; + snprintf(buf, sizeof(buf), "Cannot open property file %s: %s", + file_path, strerror(errno)); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + // Get file size + fseek (file, 0, SEEK_END); + file_size = ftell (file); + rewind (file); + + if (file_size == 0) { + // Empty property + value = PyString_FromString(""); + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert value to python string"); + goto cleanup; + } + } else { + if (file_size > MAX_PROPERTY_LENGTH) { + PyErr_SetString(PyExc_ValueError, "Property file too big"); + goto cleanup; + } + + // Read the whole file + value_buf = PyMem_Malloc(file_size); + if (value_buf == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + long read_size = fread(value_buf, 1, file_size, file); + if (read_size < file_size) { + char buf[256]; + snprintf(buf, sizeof(buf), + "Error while reading property file %s", file_path); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + // Convert value to dbus.ByteArray + PyObject *args = Py_BuildValue("(s#)", value_buf, file_size); + value = PyObject_CallObject(byte_array_type, args); + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert value to dbus.ByteArray"); + goto cleanup; + } + } + + // Add property to the metadata dict + if (PyDict_SetItemString(dict, dir_entry->d_name, value) == -1) { + PyErr_SetString(PyExc_ValueError, + "Failed to add property to dictionary"); + goto cleanup; + } + + next_property: + if (file_path) { + PyMem_Free(file_path); + file_path = NULL; + } + if (file) { + fclose(file); + file = NULL; + } + if (value_buf) { + PyMem_Free(value_buf); + value_buf = NULL; + } + + dir_entry = readdir(dir_stream); + } + + closedir(dir_stream); + + return dict; + +cleanup: + if (file_path) { + PyMem_Free(file_path); + } + if (value_buf) { + PyMem_Free(value_buf); + } + if (dict) { + Py_DECREF(dict); + } + if (file) { + fclose(file); + } + if (dir_stream) { + closedir(dir_stream); + } + return NULL; +} + +static PyMethodDef metadatareader_functions[] = { + {"retrieve", metadatareader_retrieve, METH_VARARGS, PyDoc_STR("Read a dictionary from a file")}, + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initmetadatareader(void) +{ + PyObject* mod; + mod = Py_InitModule("metadatareader", metadatareader_functions); + + PyObject *dbus_module = PyImport_ImportModule("dbus"); + byte_array_type = PyObject_GetAttrString(dbus_module, "ByteArray"); +} + diff --git a/src/olpc/datastore/metadatastore.py b/src/olpc/datastore/metadatastore.py index 810ccb8..4f663a7 100644 --- a/src/olpc/datastore/metadatastore.py +++ b/src/olpc/datastore/metadatastore.py @@ -3,6 +3,7 @@ import logging import errno from olpc.datastore import layoutmanager +from olpc.datastore import metadatareader MAX_SIZE = 256 @@ -26,30 +27,7 @@ class MetadataStore(object): def retrieve(self, uid, properties=None): dir_path = layoutmanager.get_instance().get_entry_path(uid) - if not os.path.exists(dir_path): - raise ValueError('Unknown object: %r' % uid) - - metadata_path = os.path.join(dir_path, 'metadata') - metadata = {} - if properties is None or not properties: - properties = os.listdir(metadata_path) - - for key in properties: - property_path = metadata_path + '/' + key - try: - value = open(property_path, 'r').read() - except IOError, e: - if e.errno != errno.ENOENT: - raise - else: - if not value: - metadata[key] = '' - else: - # TODO: This class shouldn't know anything about dbus. - import dbus - metadata[key] = dbus.ByteArray(value) - - return metadata + return metadatareader.retrieve(dir_path, properties) def delete(self, uid): dir_path = layoutmanager.get_instance().get_entry_path(uid) @@ -58,14 +36,3 @@ class MetadataStore(object): os.remove(os.path.join(metadata_path, key)) os.rmdir(metadata_path) - def _cast_for_journal(self, key, value): - # Hack because the current journal expects these properties to have some - # predefined types - if key in ['timestamp', 'keep']: - try: - return int(value) - except ValueError: - return value - else: - return value - |