Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-26 15:48:39 (GMT)
committer Tomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-26 15:48:39 (GMT)
commitb905626986ac003db909ff28f7bcfd7fcd4eeca0 (patch)
tree7f2f0d6283ce37194067a1eaab37191b43109b7d /src
parent96564f6bb56079cc1eccd0230057f753fce7f6f4 (diff)
Implement metadata reading in C for speed
Diffstat (limited to 'src')
-rw-r--r--src/olpc/datastore/Makefile.am13
-rw-r--r--src/olpc/datastore/layoutmanager.py2
-rw-r--r--src/olpc/datastore/metadatareader.c199
-rw-r--r--src/olpc/datastore/metadatastore.py37
4 files changed, 215 insertions, 36 deletions
diff --git a/src/olpc/datastore/Makefile.am b/src/olpc/datastore/Makefile.am
index 78f2d03..43b6d14 100644
--- a/src/olpc/datastore/Makefile.am
+++ b/src/olpc/datastore/Makefile.am
@@ -6,3 +6,16 @@ datastore_PYTHON = \
indexstore.py \
layoutmanager.py \
metadatastore.py
+
+AM_CPPFLAGS = \
+ $(WARN_CFLAGS) \
+ $(EXT_CFLAGS) \
+ $(PYTHON_INCLUDES)
+
+AM_LDFLAGS = -module -avoid-version
+
+pkgpyexecdir = $(pythondir)/olpc/datastore
+pkgpyexec_LTLIBRARIES = metadatareader.la
+
+metadatareader_la_SOURCES = \
+ metadatareader.c
diff --git a/src/olpc/datastore/layoutmanager.py b/src/olpc/datastore/layoutmanager.py
index db91690..a01139e 100644
--- a/src/olpc/datastore/layoutmanager.py
+++ b/src/olpc/datastore/layoutmanager.py
@@ -21,7 +21,7 @@ class LayoutManager(object):
os.makedirs(path)
def get_entry_path(self, uid):
- return os.path.join(self._root_path, uid[:2], uid)
+ return '%s/%s/%s' % (self._root_path, uid[:2], uid)
def get_index_path(self):
return os.path.join(self._root_path, 'index')
diff --git a/src/olpc/datastore/metadatareader.c b/src/olpc/datastore/metadatareader.c
new file mode 100644
index 0000000..ce6d38e
--- /dev/null
+++ b/src/olpc/datastore/metadatareader.c
@@ -0,0 +1,199 @@
+#include "Python.h"
+
+#include <dirent.h>
+
+// TODO: put it in a place where python can use it when writing metadata
+#define MAX_PROPERTY_LENGTH 500 * 1024
+
+static PyObject *byte_array_type = NULL;
+
+static PyObject *
+metadatareader_retrieve(PyObject *unused, PyObject *args)
+{
+ PyObject *dict = NULL;
+ PyObject *properties = NULL;
+ const char *dir_path = NULL;
+ char *metadata_path = NULL;
+ DIR *dir_stream = NULL;
+ struct dirent *dir_entry = NULL;
+ char *file_path = NULL;
+ FILE *file = NULL;
+ char *value_buf = NULL;
+
+ if (!PyArg_ParseTuple(args, "sO:retrieve", &dir_path, &properties))
+ return NULL;
+
+ // Build path to the metadata directory
+ int metadata_path_size = strlen(dir_path) + 10;
+ metadata_path = PyMem_Malloc(metadata_path_size);
+ if (metadata_path == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ snprintf (metadata_path, metadata_path_size, "%s/%s", dir_path, "metadata");
+
+ dir_stream = opendir (metadata_path);
+ if (dir_stream == NULL) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Couldn't open metadata directory %s",
+ metadata_path);
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ dict = PyDict_New();
+
+ dir_entry = readdir(dir_stream);
+ while (dir_entry != NULL) {
+ long file_size;
+ int file_path_size;
+ PyObject *value = NULL;
+
+ // Skip . and ..
+ if (dir_entry->d_name[0] == '.' &&
+ (strlen(dir_entry->d_name) == 1 ||
+ (dir_entry->d_name[1] == '.' &&
+ strlen(dir_entry->d_name) == 2)))
+ goto next_property;
+
+ // Check if the property is in the properties list
+ if ((properties != Py_None) && (PyList_Size(properties) > 0)) {
+ int found = 0;
+ int i;
+ for (i = 0; i < PyList_Size(properties); i++) {
+ PyObject *property = PyList_GetItem(properties, i);
+ if (!strcmp (dir_entry->d_name, PyString_AsString (property))) {
+ found = 1;
+ }
+ }
+ if (!found) {
+ goto next_property;
+ }
+ }
+
+ // Build path of the property file
+ file_path_size = strlen(metadata_path) + 1 + strlen(dir_entry->d_name) +
+ 1;
+ file_path = PyMem_Malloc(file_path_size);
+ if (file_path == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ snprintf (file_path, file_path_size, "%s/%s", metadata_path,
+ dir_entry->d_name);
+
+ file = fopen(file_path, "r");
+ if (file == NULL) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Cannot open property file %s: %s",
+ file_path, strerror(errno));
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ // Get file size
+ fseek (file, 0, SEEK_END);
+ file_size = ftell (file);
+ rewind (file);
+
+ if (file_size == 0) {
+ // Empty property
+ value = PyString_FromString("");
+ if (value == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to convert value to python string");
+ goto cleanup;
+ }
+ } else {
+ if (file_size > MAX_PROPERTY_LENGTH) {
+ PyErr_SetString(PyExc_ValueError, "Property file too big");
+ goto cleanup;
+ }
+
+ // Read the whole file
+ value_buf = PyMem_Malloc(file_size);
+ if (value_buf == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ long read_size = fread(value_buf, 1, file_size, file);
+ if (read_size < file_size) {
+ char buf[256];
+ snprintf(buf, sizeof(buf),
+ "Error while reading property file %s", file_path);
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ // Convert value to dbus.ByteArray
+ PyObject *args = Py_BuildValue("(s#)", value_buf, file_size);
+ value = PyObject_CallObject(byte_array_type, args);
+ if (value == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to convert value to dbus.ByteArray");
+ goto cleanup;
+ }
+ }
+
+ // Add property to the metadata dict
+ if (PyDict_SetItemString(dict, dir_entry->d_name, value) == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to add property to dictionary");
+ goto cleanup;
+ }
+
+ next_property:
+ if (file_path) {
+ PyMem_Free(file_path);
+ file_path = NULL;
+ }
+ if (file) {
+ fclose(file);
+ file = NULL;
+ }
+ if (value_buf) {
+ PyMem_Free(value_buf);
+ value_buf = NULL;
+ }
+
+ dir_entry = readdir(dir_stream);
+ }
+
+ closedir(dir_stream);
+
+ return dict;
+
+cleanup:
+ if (file_path) {
+ PyMem_Free(file_path);
+ }
+ if (value_buf) {
+ PyMem_Free(value_buf);
+ }
+ if (dict) {
+ Py_DECREF(dict);
+ }
+ if (file) {
+ fclose(file);
+ }
+ if (dir_stream) {
+ closedir(dir_stream);
+ }
+ return NULL;
+}
+
+static PyMethodDef metadatareader_functions[] = {
+ {"retrieve", metadatareader_retrieve, METH_VARARGS, PyDoc_STR("Read a dictionary from a file")},
+ {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initmetadatareader(void)
+{
+ PyObject* mod;
+ mod = Py_InitModule("metadatareader", metadatareader_functions);
+
+ PyObject *dbus_module = PyImport_ImportModule("dbus");
+ byte_array_type = PyObject_GetAttrString(dbus_module, "ByteArray");
+}
+
diff --git a/src/olpc/datastore/metadatastore.py b/src/olpc/datastore/metadatastore.py
index 810ccb8..4f663a7 100644
--- a/src/olpc/datastore/metadatastore.py
+++ b/src/olpc/datastore/metadatastore.py
@@ -3,6 +3,7 @@ import logging
import errno
from olpc.datastore import layoutmanager
+from olpc.datastore import metadatareader
MAX_SIZE = 256
@@ -26,30 +27,7 @@ class MetadataStore(object):
def retrieve(self, uid, properties=None):
dir_path = layoutmanager.get_instance().get_entry_path(uid)
- if not os.path.exists(dir_path):
- raise ValueError('Unknown object: %r' % uid)
-
- metadata_path = os.path.join(dir_path, 'metadata')
- metadata = {}
- if properties is None or not properties:
- properties = os.listdir(metadata_path)
-
- for key in properties:
- property_path = metadata_path + '/' + key
- try:
- value = open(property_path, 'r').read()
- except IOError, e:
- if e.errno != errno.ENOENT:
- raise
- else:
- if not value:
- metadata[key] = ''
- else:
- # TODO: This class shouldn't know anything about dbus.
- import dbus
- metadata[key] = dbus.ByteArray(value)
-
- return metadata
+ return metadatareader.retrieve(dir_path, properties)
def delete(self, uid):
dir_path = layoutmanager.get_instance().get_entry_path(uid)
@@ -58,14 +36,3 @@ class MetadataStore(object):
os.remove(os.path.join(metadata_path, key))
os.rmdir(metadata_path)
- def _cast_for_journal(self, key, value):
- # Hack because the current journal expects these properties to have some
- # predefined types
- if key in ['timestamp', 'keep']:
- try:
- return int(value)
- except ValueError:
- return value
- else:
- return value
-