From 192ad89c9ade5dd9c38cfe3280666f3a2c7c17d3 Mon Sep 17 00:00:00 2001 From: Marco Pesenti Gritti Date: Sun, 22 Aug 2010 02:45:43 +0000 Subject: Merge datastore module in a subdirectory --- diff --git a/datastore/.gitignore b/datastore/.gitignore new file mode 100644 index 0000000..12965e2 --- /dev/null +++ b/datastore/.gitignore @@ -0,0 +1,35 @@ +# Generic + +*.pyc +*~ +*.deps +*.libs +*.la +*.lo +*.loT +*.service + +# Absolute +Makefile +Makefile.in +aclocal.m4 +autom4te.cache +compile +config.guess +config.log +config.status +config.sub +configure +depcomp +install-sh +libtool +ltmain.sh +missing +mkinstalldirs +py-compile +stamp-h1 +m4/libtool.m4 +m4/ltoptions.m4 +m4/ltsugar.m4 +m4/ltversion.m4 +m4/lt~obsolete.m4 diff --git a/datastore/AUTHORS b/datastore/AUTHORS new file mode 100644 index 0000000..a2c2720 --- /dev/null +++ b/datastore/AUTHORS @@ -0,0 +1,24 @@ +Contributors +============ + +Aleksey Lim +Benjamin Saller +Bernie Innocenti +Bert Freudenberg +Dan Williams +Marco Pesenti Gritti +Sascha Silbe +Simon McVittie +Simon Schampijer +Tomeu Vizoso +Wade Brainerd + +Past maintainers +================ + +Tomeu Vizoso + +Current maintainers +=================== + +Aleksey Lim diff --git a/datastore/COPYING b/datastore/COPYING new file mode 100644 index 0000000..ba9543b --- /dev/null +++ b/datastore/COPYING @@ -0,0 +1,222 @@ + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS diff --git a/datastore/Makefile.am b/datastore/Makefile.am new file mode 100644 index 0000000..bfebefe --- /dev/null +++ b/datastore/Makefile.am @@ -0,0 +1,7 @@ +ACLOCAL_AMFLAGS = -I m4 + +SUBDIRS = bin etc src + +test: + @cd tests + $(MAKE) -C tests test diff --git a/datastore/NEWS b/datastore/NEWS new file mode 100644 index 0000000..0a765be --- /dev/null +++ b/datastore/NEWS @@ -0,0 +1,41 @@ +v0.89.2 2010-08-04 +* sl#2132: reduce _FLUSH_TIMEOUT to 5 seconds +* Set index_updated flag on ds shutting down #2095 + +v0.89.1 2010-07-14 +* Invert index_updated logic #2095 + +v0.88.0 2010-03-29 +* fix migration from 0.82 data store (#1838) + +v0.87.4 2010-03-10 +* Do not remove internal metadata fields while rewriting object #1813 +* Auto detect mime type in copy-to-journal #586 (tomeu) + +v0.87.3 2010-03-08 +* rebuild index on migration #1787 + +v0.87.2 2009-12-21 +* copy-to-journal dbus error #573 +* file descriptor leak in filestore.retrieve() #1550 +* range queries use lexical comparison, not numerical #1342 +* add and use type information for Xapian value-stored properties #1437 + +v0.87.1 2009-12-01 +* Make reading version file more robust (sayamindu) #1562 +* Use gobject_timeout_add_seconds to make power usage more efficient (sayamindu) #1567 + +v0.86.2 2010-03-08 +* rebuild index on migration #1787 + +v0.86.1 2009-10-01 +* Screenshot file is not deleted #1445 + +v0.86.0 2009-09-21 +* fix 'error return without exception set' on ARM (#1404) +* eliminate incorrect warning about unknown search terms (#1363) +* Fix AUTHORS file +* Add AM_MAINTAINER_MODE macro to configure.ac + +v0.85.4 2009-09-17 +* Use basename for uploaded files by default in copy-to-journal script #1372 diff --git a/datastore/README b/datastore/README new file mode 100644 index 0000000..b29cebb --- /dev/null +++ b/datastore/README @@ -0,0 +1,41 @@ +About +===== + +Provides activities with a way to store data and metadata and the journal +with means for querying, including full text search. + + +Resources +========= + +Code Repository + http://git.sugarlabs.org/projects/sugar-datastore/ + +Bug Tracking + http://dev.sugarlabs.org/ + +Home Page + http://wiki.sugarlabs.org/ + + +Storage format history +====================== + +0 0.82.x + Initial format + +1 0.84.x + Refactoring, start using indexes + +2 0.86.0, 0.86.1 + Add sorting by title and mtime + +3 not-mainstream + test versioning support + +4 0.86.2, 0.88.x + version bump to force index rebuild that may have been missed during the + migration to version 2 (SL#1787) + +5 not-mainstream + test versioning support (version bump for SL#1787) diff --git a/datastore/autogen.sh b/datastore/autogen.sh new file mode 100755 index 0000000..1cd5db4 --- /dev/null +++ b/datastore/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/sh +export ACLOCAL="aclocal -I m4" + +autoreconf -i +./configure "$@" diff --git a/datastore/bin/Makefile.am b/datastore/bin/Makefile.am new file mode 100644 index 0000000..c583cbe --- /dev/null +++ b/datastore/bin/Makefile.am @@ -0,0 +1,6 @@ +bin_SCRIPTS = \ + datastore-service \ + copy-from-journal \ + copy-to-journal + +EXTRA_DIST = $(bin_SCRIPTS) diff --git a/datastore/bin/copy-from-journal b/datastore/bin/copy-from-journal new file mode 100755 index 0000000..7a10bfd --- /dev/null +++ b/datastore/bin/copy-from-journal @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# +# Simple script to export a file from the datastore +# Reinier Heeres, , 2007-12-24 +# Phil Bordelon + +import sys +import os +import shutil +import optparse +import dbus + +if os.path.exists("/tmp/olpc-session-bus"): + os.environ["DBUS_SESSION_BUS_ADDRESS"] = "unix:path=/tmp/olpc-session-bus" + +from sugar.datastore import datastore +import sugar.mime + +# Limit the number of objects returned on an ambiguous query to this number, +# for quicker operation. +RETURN_LIMIT = 2 + +def build_option_parser(): + + usage = "Usage: %prog [-o OBJECT_ID] [-q SEARCH_STR] [-m] OUTFILE" + parser = optparse.OptionParser(usage=usage) + + parser.add_option("-o", "--object_id", action="store", dest="object_id", + help="Retrieve object with explicit ID OBJECT_ID", + metavar="OBJECT_ID", default=None) + + parser.add_option("-q", "--query", action="store", dest="query", + help="Full-text-search the metadata for SEARCH_STR", + metavar="SEARCH_STR", default=None) + + parser.add_option("-m", "--metadata", action="store_true", dest="show_meta", + help="Show all non-preview metadata [default: hide]", + default=False) + + return parser + +if __name__ == "__main__": + + parser = build_option_parser() + options, args = parser.parse_args() + if len(args) < 1: + parser.print_help() + exit(0) + + try: + dsentry = None + + # Get object directly if we were given an explicit object ID. + if options.object_id is not None: + dsentry = datastore.get(options.object_id) + + # Compose the query based on the options provided. + if dsentry is None: + query = {} + + if options.query is not None: + query['query'] = options.query + + # We only want a single file at a time; limit the number of objects + # returned to two, as anything more than one means the criteria were + # not limited enough. + objects, count = datastore.find(query, limit=RETURN_LIMIT, sorting='-mtime') + if count > 1: + print 'WARNING: %d objects found; retrieving most recent.' % (count) + for i in xrange(1, RETURN_LIMIT): + objects[i].destroy() + + if count > 0: + dsentry = objects[0] + + # If neither an explicit object ID nor a query gave us data, fail. + if dsentry is None: + print 'ERROR: unable to determine journal object to copy.' + parser.print_help() + exit(0) + + # Print metadata if that is what the user asked for. + if options.show_meta: + print 'Metadata:' + for key, val in dsentry.metadata.get_dictionary().iteritems(): + if key != 'preview': + print '%20s -> %s' % (key, val) + + # If no file is associated with this object, we can't save it out. + if dsentry.get_file_path() == "": + print 'ERROR: no file associated with object, just metadata.' + dsentry.destroy() + exit(0) + + outname = args[0] + outroot, outext = os.path.splitext(outname) + + # Do our best to determine the output file extension, based on Sugar's + # MIME-type-to-extension mappings. + if outext == "": + mimetype = dsentry.metadata['mime_type'] + outext = sugar.mime.get_primary_extension(mimetype) + if outext == None: + outext = "dsobject" + outext = '.' + outext + + # Lastly, actually copy the file out of the datastore and onto the + # filesystem. + shutil.copyfile(dsentry.get_file_path(), outroot + outext) + print '%s -> %s' % (dsentry.get_file_path(), outroot + outext) + + # Cleanup. + dsentry.destroy() + + except dbus.DBusException: + print 'ERROR: Unable to connect to the datastore.\n'\ + 'Check that you are running in the same environment as the '\ + 'datastore service.' + + except Exception, e: + print 'ERROR: %s' % (e) diff --git a/datastore/bin/copy-to-journal b/datastore/bin/copy-to-journal new file mode 100755 index 0000000..ca6f872 --- /dev/null +++ b/datastore/bin/copy-to-journal @@ -0,0 +1,97 @@ +#!/usr/bin/python +# +# Simple script to import a file to the datastore +# Reinier Heeres, , 2007-12-20 +# +# Modified by Phil Bordelon 2007-12-20, 2007-12-21 +# to support adding metadata. Note that the MIME-type is required, +# as otherwise the datastore will not accept the file. + +import sys +import os +import optparse +from gettext import gettext as _ +import dbus + +if os.path.exists("/tmp/olpc-session-bus"): + os.environ["DBUS_SESSION_BUS_ADDRESS"] = "unix:path=/tmp/olpc-session-bus" + +from sugar.datastore import datastore +from sugar import mime + +def build_option_parser(): + + usage = "Usage: %prog [-m MIMETYPE] [-t TITLE] [-d DESC] [-T tag1 [-T tag2 ...]]" + parser = optparse.OptionParser(usage=usage) + + parser.add_option("-t", "--title", action="store", dest="title", + help="Set the title of the journal entry to TITLE", metavar="TITLE", + default=None) + parser.add_option("-d", "--description", action="store", + dest="description", metavar="DESC", + help="Set the description of the journal entry to DESC", + default=None) + parser.add_option("-m", "--mimetype", action="store", + dest="mimetype", metavar="MIMETYPE", + help="Set the file's MIME-type to MIMETYPE", + default=None) + parser.add_option("-T", "--tag", action="append", dest="tag_list", + help="Add tag TAG to the journal entry's tags; this option can be repeated", + metavar="TAG") + return parser + +if __name__ == "__main__": + + parser = build_option_parser() + options, args = parser.parse_args() + if len(args) < 1: + parser.print_help() + exit(0) + + fname = args[0] + absname = os.path.abspath(fname) + if not os.path.exists(absname): + print 'Error: File does not exist.' + parser.print_help() + exit(0) + + try: + entry = datastore.create() + entry.set_file_path(absname) + + # Set the mimetype to the provided one. + if options.mimetype is None: + entry.metadata['mime_type'] = mime.get_for_file(absname) + else: + entry.metadata['mime_type'] = options.mimetype + + # If no title is given, use the filename. + if options.title: + entry.metadata['title'] = options.title + else: + entry.metadata['title'] = os.path.basename(fname) + + # Use the description given, otherwise leave it blank. + if options.description: + entry.metadata['description'] = options.description + else: + entry.metadata['description'] = _('From: %s') % fname + + # Lastly, if any tags are given, combine them into a single string + # and save them. + if options.tag_list: + tag_string = " ".join(options.tag_list) + entry.metadata['tags'] = tag_string + + datastore.write(entry) + print 'Created as %s' % (entry.object_id) + + entry.destroy() + + except dbus.DBusException: + print 'ERROR: Unable to connect to the datastore.\n'\ + 'Check that you are running in the same environment as the '\ + 'datastore service.' + + except Exception, e: + print 'ERROR: %s' % (e) diff --git a/datastore/bin/datastore-service b/datastore/bin/datastore-service new file mode 100755 index 0000000..06b6517 --- /dev/null +++ b/datastore/bin/datastore-service @@ -0,0 +1,58 @@ +#!/usr/bin/env python +import sys, os, signal, logging +import gobject +import dbus.service +import dbus.mainloop.glib +import dbus.glib +from carquinyol.datastore import DataStore +from sugar import logger + +# Path handling +profile = os.environ.get('SUGAR_PROFILE', 'default') +base_dir = os.path.join(os.path.expanduser('~'), '.sugar', profile) +log_dir = os.path.join(base_dir, "logs") +if not os.path.exists(log_dir): os.makedirs(log_dir) + +# setup logger +logger.start('datastore') + +# build the datastore +dbus.mainloop.glib.DBusGMainLoop(set_as_default=True) +bus = dbus.SessionBus() +connected = True + +ds = DataStore() + +# and run it +mainloop = gobject.MainLoop() + +def handle_disconnect(): + global mainloop + mainloop.quit() + logging.debug("Datastore disconnected from the bus.") + +def handle_shutdown(signum, frame): + global mainloop + mainloop.quit() + raise SystemExit("Shutting down on signal %s" % signum) + +bus.set_exit_on_disconnect(False) +bus.add_signal_receiver(handle_disconnect, + signal_name='Disconnected', + dbus_interface='org.freedesktop.DBus.Local') + +signal.signal(signal.SIGHUP, handle_shutdown) +signal.signal(signal.SIGTERM, handle_shutdown) + +def main(): + try: + mainloop.run() + except KeyboardInterrupt: + logging.info("DataStore shutdown by user") + except: + logging.error("Datastore shutdown with error", exc_info=sys.exc_info()) + +main() + +ds.stop() + diff --git a/datastore/configure.ac b/datastore/configure.ac new file mode 100644 index 0000000..6ccea2e --- /dev/null +++ b/datastore/configure.ac @@ -0,0 +1,24 @@ +AC_INIT([sugar-datastore],[0.89.2],[],[sugar-datastore]) + +AC_PREREQ([2.59]) + +AC_CONFIG_MACRO_DIR([m4]) +AC_CONFIG_SRCDIR([configure.ac]) + +AM_INIT_AUTOMAKE([1.9 foreign dist-bzip2 no-dist-gzip]) + +AM_MAINTAINER_MODE + +AC_DISABLE_STATIC +AC_PROG_LIBTOOL + +AM_PATH_PYTHON([2.5]) +AM_CHECK_PYTHON_HEADERS(,[AC_MSG_ERROR(could not find Python headers)]) + +AC_OUTPUT([ +Makefile +bin/Makefile +etc/Makefile +src/Makefile +src/carquinyol/Makefile +]) diff --git a/datastore/etc/.gitignore b/datastore/etc/.gitignore new file mode 100644 index 0000000..c0ede5e --- /dev/null +++ b/datastore/etc/.gitignore @@ -0,0 +1 @@ +org.laptop.sugar.DataStore.service diff --git a/datastore/etc/Makefile.am b/datastore/etc/Makefile.am new file mode 100644 index 0000000..a9b28b1 --- /dev/null +++ b/datastore/etc/Makefile.am @@ -0,0 +1,12 @@ +servicedir = $(datadir)/dbus-1/services +service_in_files = \ + org.laptop.sugar.DataStore.service.in + +service_DATA = $(service_in_files:.service.in=.service) + +org.laptop.sugar.DataStore.service: org.laptop.sugar.DataStore.service.in + @sed -e "s|\@bindir\@|$(bindir)|" $< > $@ + +DISTCLEANFILES = $(service_DATA) + +EXTRA_DIST = $(service_in_files) diff --git a/datastore/etc/org.laptop.sugar.DataStore.service.in b/datastore/etc/org.laptop.sugar.DataStore.service.in new file mode 100644 index 0000000..1b5b270 --- /dev/null +++ b/datastore/etc/org.laptop.sugar.DataStore.service.in @@ -0,0 +1,3 @@ +[D-BUS Service] +Name = org.laptop.sugar.DataStore +Exec = @bindir@/datastore-service diff --git a/datastore/m4/python.m4 b/datastore/m4/python.m4 new file mode 100644 index 0000000..e1c5266 --- /dev/null +++ b/datastore/m4/python.m4 @@ -0,0 +1,62 @@ +## this one is commonly used with AM_PATH_PYTHONDIR ... +dnl AM_CHECK_PYMOD(MODNAME [,SYMBOL [,ACTION-IF-FOUND [,ACTION-IF-NOT-FOUND]]]) +dnl Check if a module containing a given symbol is visible to python. +AC_DEFUN([AM_CHECK_PYMOD], +[AC_REQUIRE([AM_PATH_PYTHON]) +py_mod_var=`echo $1['_']$2 | sed 'y%./+-%__p_%'` +AC_MSG_CHECKING(for ifelse([$2],[],,[$2 in ])python module $1) +AC_CACHE_VAL(py_cv_mod_$py_mod_var, [ +ifelse([$2],[], [prog=" +import sys +try: + import $1 +except ImportError: + sys.exit(1) +except: + sys.exit(0) +sys.exit(0)"], [prog=" +import $1 +$1.$2"]) +if $PYTHON -c "$prog" 1>&AC_FD_CC 2>&AC_FD_CC + then + eval "py_cv_mod_$py_mod_var=yes" + else + eval "py_cv_mod_$py_mod_var=no" + fi +]) +py_val=`eval "echo \`echo '$py_cv_mod_'$py_mod_var\`"` +if test "x$py_val" != xno; then + AC_MSG_RESULT(yes) + ifelse([$3], [],, [$3 +])dnl +else + AC_MSG_RESULT(no) + ifelse([$4], [],, [$4 +])dnl +fi +]) + +dnl a macro to check for ability to create python extensions +dnl AM_CHECK_PYTHON_HEADERS([ACTION-IF-POSSIBLE], [ACTION-IF-NOT-POSSIBLE]) +dnl function also defines PYTHON_INCLUDES +AC_DEFUN([AM_CHECK_PYTHON_HEADERS], +[AC_REQUIRE([AM_PATH_PYTHON]) +AC_MSG_CHECKING(for headers required to compile python extensions) +dnl deduce PYTHON_INCLUDES +py_prefix=`$PYTHON -c "import sys; print sys.prefix"` +py_exec_prefix=`$PYTHON -c "import sys; print sys.exec_prefix"` +PYTHON_INCLUDES="-I${py_prefix}/include/python${PYTHON_VERSION}" +if test "$py_prefix" != "$py_exec_prefix"; then + PYTHON_INCLUDES="$PYTHON_INCLUDES -I${py_exec_prefix}/include/python${PYTHON_VERSION}" +fi +AC_SUBST(PYTHON_INCLUDES) +dnl check if the headers exist: +save_CPPFLAGS="$CPPFLAGS" +CPPFLAGS="$CPPFLAGS $PYTHON_INCLUDES" +AC_TRY_CPP([#include ],dnl +[AC_MSG_RESULT(found) +$1],dnl +[AC_MSG_RESULT(not found) +$2]) +CPPFLAGS="$save_CPPFLAGS" +]) diff --git a/datastore/maint-helper.py b/datastore/maint-helper.py new file mode 100755 index 0000000..5ffd7e0 --- /dev/null +++ b/datastore/maint-helper.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python + +# Copyright (C) 2007, Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import os +import sys +import re +import datetime +import subprocess + + +SOURCE_EXTS = ['.py', '.c', '.h', '.cpp'] +COPYRIGHT = 'Copyright (C) ' + + +def is_source(path): + for ext in SOURCE_EXTS: + if path.endswith(ext): + return True + + +def get_name_and_version(): + f = open('configure.ac', 'r') + config = f.read() + f.close() + + exp = 'AC_INIT\(\[[^\]]+\],\[([^\]]+)\],\[\],\[([^\]]+)\]' + match = re.search(exp, config) + if not match: + print 'Cannot find the package name and version.' + sys.exit(0) + + return (match.group(2), match.group(1)) + + +def cmd_help(): + print 'Usage: \n\ +maint-helper.py build-snapshot - build a source snapshot \n\ +maint-helper.py fix-copyright [path] - fix the copyright year \n\ +maint-helper.py check-licenses - check licenses in the source' + + +def cmd_build_snapshot(): + name, version = get_name_and_version() + + print 'Update git...' + + retcode = subprocess.call(['git', 'pull']) + if retcode: + print 'ERROR - cannot pull from git' + + cmd = 'git-show-ref --hash=10 refs/heads/master' + alphatag = os.popen(cmd).readline().strip() + + tarball = '%s-%s-git%s.tar.bz2' % (name, version, alphatag) + + print 'Build %s...' % tarball + + os.spawnlp(os.P_WAIT, 'make', 'make', 'distcheck') + + os.rename('%s-%s.tar.bz2' % (name, version), tarball) + + print 'Update NEWS.sugar...' + + if 'SUGAR_NEWS' in os.environ: + sugar_news_path = os.environ['SUGAR_NEWS'] + if os.path.isfile(sugar_news_path): + f = open(sugar_news_path, 'r') + sugar_news = f.read() + f.close() + else: + sugar_news = '' + + name, version = get_name_and_version() + sugar_news += '%s - %s - %s\n\n' % (name, version, alphatag) + + f = open('NEWS', 'r') + for line in f.readlines(): + if len(line.strip()) > 0: + sugar_news += line + else: + break + f.close() + + f = open(sugar_news_path, 'w') + f.write(sugar_news) + f.close() + + print 'Update NEWS...' + + f = open('NEWS', 'r') + news = f.read() + f.close() + + news = 'Snapshot %s\n\n' % alphatag + news + + f = open('NEWS', 'w') + f.write(news) + f.close() + + print 'Committing to git...' + + changelog = 'Snapshot %s.' % alphatag + retcode = subprocess.call(['git', 'commit', '-a', '-m % s' % changelog]) + if retcode: + print 'ERROR - cannot commit to git' + + retcode = subprocess.call(['git', 'push']) + if retcode: + print 'ERROR - cannot push to git' + + print 'Done.' + + +def check_licenses(path, license, missing): + matchers = {'LGPL': 'GNU Lesser General Public', + 'GPL': 'GNU General Public License'} + + license_file = os.path.join(path, '.license') + if os.path.isfile(license_file): + f = open(license_file, 'r') + license = f.readline().strip() + f.close() + + for item in os.listdir(path): + full_path = os.path.join(path, item) + + if os.path.isdir(full_path): + check_licenses(full_path, license, missing) + else: + check_source = is_source(item) + + # Special cases. + if item.find('marshal') > 0 or \ + item.startswith('egg') > 0: + check_source = False + + if check_source: + f = open(full_path, 'r') + source = f.read() + f.close() + + miss_license = True + if source.find(matchers[license]) > 0: + miss_license = False + + # Special cases. + if source.find('THIS FILE IS GENERATED') > 0: + miss_license = False + + if miss_license: + if license not in missing: + missing[license] = [] + missing[license].append(full_path) + + +def cmd_check_licenses(): + missing = {} + check_licenses(os.getcwd(), 'GPL', missing) + + for item in missing.keys(): + print '%s:\n' % item + for path in missing[item]: + print path + print '\n' + + +def fix_copyright(path): + for item in os.listdir(path): + full_path = os.path.join(path, item) + + if os.path.isdir(full_path): + fix_copyright(full_path) + elif is_source(item): + f = open(full_path, 'r') + source = f.read() + f.close() + + year_start = -1 + year_end = -1 + + i1 = source.find(COPYRIGHT) + if i1 != -1: + i1 += len(COPYRIGHT) + i2 = i1 + source[i1:].find(' ') + if i1 > 0: + try: + year_start = int(source[i1:i1 + 4]) + year_end = int(source[i1 + 6: i1 + 10]) + except ValueError: + pass + + if year_start > 0 and year_end < 0: + year_end = year_start + + year = datetime.date.today().year + if year_end < year: + result = '%s%d-%d%s' % (source[:i1], year_start, + year, source[i2:]) + f = open(full_path, 'w') + f.write(result) + f.close() + + +def cmd_fix_copyright(path): + fix_copyright(path) + + +if len(sys.argv) < 2: + cmd_help() +elif sys.argv[1] == 'build-snapshot': + cmd_build_snapshot() +elif sys.argv[1] == 'check-licenses': + cmd_check_licenses() +elif sys.argv[1] == 'fix-copyright' and len(sys.argv) > 2: + cmd_fix_copyright(sys.argv[2]) diff --git a/datastore/src/Makefile.am b/datastore/src/Makefile.am new file mode 100644 index 0000000..434face --- /dev/null +++ b/datastore/src/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = carquinyol diff --git a/datastore/src/carquinyol/Makefile.am b/datastore/src/carquinyol/Makefile.am new file mode 100644 index 0000000..7c56174 --- /dev/null +++ b/datastore/src/carquinyol/Makefile.am @@ -0,0 +1,23 @@ +datastoredir = $(pythondir)/carquinyol +datastore_PYTHON = \ + __init__.py \ + datastore.py \ + filestore.py \ + indexstore.py \ + layoutmanager.py \ + metadatastore.py \ + migration.py \ + optimizer.py + +AM_CPPFLAGS = \ + $(WARN_CFLAGS) \ + $(EXT_CFLAGS) \ + $(PYTHON_INCLUDES) + +AM_LDFLAGS = -module -avoid-version + +pkgpyexecdir = $(pythondir)/carquinyol +pkgpyexec_LTLIBRARIES = metadatareader.la + +metadatareader_la_SOURCES = \ + metadatareader.c diff --git a/datastore/src/carquinyol/__init__.py b/datastore/src/carquinyol/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/datastore/src/carquinyol/__init__.py diff --git a/datastore/src/carquinyol/datastore.py b/datastore/src/carquinyol/datastore.py new file mode 100644 index 0000000..82a6207 --- /dev/null +++ b/datastore/src/carquinyol/datastore.py @@ -0,0 +1,354 @@ +# Copyright (C) 2008, One Laptop Per Child +# Based on code Copyright (C) 2007, ObjectRealms, LLC +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import logging +import uuid +import time +import os +import traceback + +import dbus +import dbus.service +import gobject + +from sugar import mime + +from carquinyol import layoutmanager +from carquinyol import migration +from carquinyol.layoutmanager import MAX_QUERY_LIMIT +from carquinyol.metadatastore import MetadataStore +from carquinyol.indexstore import IndexStore +from carquinyol.filestore import FileStore +from carquinyol.optimizer import Optimizer + +# the name used by the logger +DS_LOG_CHANNEL = 'org.laptop.sugar.DataStore' + +DS_SERVICE = "org.laptop.sugar.DataStore" +DS_DBUS_INTERFACE = "org.laptop.sugar.DataStore" +DS_OBJECT_PATH = "/org/laptop/sugar/DataStore" + +logger = logging.getLogger(DS_LOG_CHANNEL) + + +class DataStore(dbus.service.Object): + """D-Bus API and logic for connecting all the other components. + """ + + def __init__(self, **options): + bus_name = dbus.service.BusName(DS_SERVICE, + bus=dbus.SessionBus(), + replace_existing=False, + allow_replacement=False) + dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH) + + migrated = self._migrate() + + self._metadata_store = MetadataStore() + self._file_store = FileStore() + self._optimizer = Optimizer(self._file_store, self._metadata_store) + self._index_store = IndexStore() + self._index_updating = False + + if migrated: + self._rebuild_index() + return + + try: + self._index_store.open_index() + except Exception: + logging.exception('Failed to open index, will rebuild') + self._rebuild_index() + return + + if not self._index_store.index_updated: + logging.debug('Index is not up-to-date, will update') + self._update_index() + + def _migrate(self): + """Check version of data store on disk and migrate if necessary. + + Returns True if migration was done and an index rebuild is required, + False otherwise. + """ + layout_manager = layoutmanager.get_instance() + old_version = layout_manager.get_version() + if old_version == layoutmanager.CURRENT_LAYOUT_VERSION: + return False + + if old_version == 0: + migration.migrate_from_0() + + layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION) + return True + + def _rebuild_index(self): + """Remove and recreate index.""" + self._index_store.close_index() + self._index_store.remove_index() + self._index_store.open_index() + self._update_index() + + def _update_index(self): + """Find entries that are not yet in the index and add them.""" + uids = layoutmanager.get_instance().find_all() + logging.debug('Going to update the index with object_ids %r', + uids) + self._index_updating = True + gobject.idle_add(lambda: self.__update_index_cb(uids), + priority=gobject.PRIORITY_LOW) + + def __update_index_cb(self, uids): + if uids: + uid = uids.pop() + + logging.debug('Updating entry %r in index. %d to go.', uid, + len(uids)) + + if not self._index_store.contains(uid): + try: + props = self._metadata_store.retrieve(uid) + self._index_store.store(uid, props) + except Exception: + logging.exception('Error processing %r', uid) + + if not uids: + self._index_store.flush() + self._index_updating = False + logging.debug('Finished updating index.') + return False + else: + return True + + def _create_completion_cb(self, async_cb, async_err_cb, uid, exc=None): + logger.debug('_create_completion_cb(%r, %r, %r, %r)', async_cb, + async_err_cb, uid, exc) + if exc is not None: + async_err_cb(exc) + return + + self.Created(uid) + self._optimizer.optimize(uid) + logger.debug('created %s', uid) + async_cb(uid) + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='a{sv}sb', + out_signature='s', + async_callbacks=('async_cb', 'async_err_cb'), + byte_arrays=True) + def create(self, props, file_path, transfer_ownership, + async_cb, async_err_cb): + uid = str(uuid.uuid4()) + logging.debug('datastore.create %r', uid) + + if not props.get('timestamp', ''): + props['timestamp'] = int(time.time()) + + self._metadata_store.store(uid, props) + self._index_store.store(uid, props) + self._file_store.store(uid, file_path, transfer_ownership, + lambda *args: self._create_completion_cb(async_cb, + async_err_cb, + uid, + *args)) + + @dbus.service.signal(DS_DBUS_INTERFACE, signature="s") + def Created(self, uid): + pass + + def _update_completion_cb(self, async_cb, async_err_cb, uid, exc=None): + logger.debug('_update_completion_cb() called with %r / %r, exc %r', + async_cb, async_err_cb, exc) + if exc is not None: + async_err_cb(exc) + return + + self.Updated(uid) + self._optimizer.optimize(uid) + logger.debug('updated %s', uid) + async_cb() + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='sa{sv}sb', + out_signature='', + async_callbacks=('async_cb', 'async_err_cb'), + byte_arrays=True) + def update(self, uid, props, file_path, transfer_ownership, + async_cb, async_err_cb): + logging.debug('datastore.update %r', uid) + + if not props.get('timestamp', ''): + props['timestamp'] = int(time.time()) + + self._metadata_store.store(uid, props) + self._index_store.store(uid, props) + + if os.path.exists(self._file_store.get_file_path(uid)) and \ + (not file_path or os.path.exists(file_path)): + self._optimizer.remove(uid) + self._file_store.store(uid, file_path, transfer_ownership, + lambda *args: self._update_completion_cb(async_cb, + async_err_cb, + uid, + *args)) + + @dbus.service.signal(DS_DBUS_INTERFACE, signature="s") + def Updated(self, uid): + pass + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='a{sv}as', + out_signature='aa{sv}u') + def find(self, query, properties): + logging.debug('datastore.find %r', query) + t = time.time() + + if not self._index_updating: + try: + uids, count = self._index_store.find(query) + except Exception: + logging.exception('Failed to query index, will rebuild') + self._rebuild_index() + + if self._index_updating: + logging.warning('Index updating, returning all entries') + return self._find_all(query, properties) + + entries = [] + for uid in uids: + entry_path = layoutmanager.get_instance().get_entry_path(uid) + if not os.path.exists(entry_path): + logging.warning( + 'Inconsistency detected, returning all entries') + self._rebuild_index() + return self._find_all(query, properties) + + metadata = self._metadata_store.retrieve(uid, properties) + entries.append(metadata) + + logger.debug('find(): %r', time.time() - t) + + return entries, count + + def _find_all(self, query, properties): + uids = layoutmanager.get_instance().find_all() + count = len(uids) + + offset = query.get('offset', 0) + limit = query.get('limit', MAX_QUERY_LIMIT) + uids = uids[offset:offset + limit] + + entries = [] + for uid in uids: + metadata = self._metadata_store.retrieve(uid, properties) + entries.append(metadata) + + return entries, count + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='s', + out_signature='s', + sender_keyword='sender') + def get_filename(self, uid, sender=None): + logging.debug('datastore.get_filename %r', uid) + user_id = dbus.Bus().get_unix_user(sender) + extension = self._get_extension(uid) + return self._file_store.retrieve(uid, user_id, extension) + + def _get_extension(self, uid): + mime_type = self._metadata_store.get_property(uid, 'mime_type') + if mime_type is None or not mime_type: + return '' + return mime.get_primary_extension(mime_type) + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='s', + out_signature='a{sv}') + def get_properties(self, uid): + logging.debug('datastore.get_properties %r', uid) + metadata = self._metadata_store.retrieve(uid) + return metadata + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='sa{sv}', + out_signature='as') + def get_uniquevaluesfor(self, propertyname, query=None): + if propertyname != 'activity': + raise ValueError('Only ''activity'' is a supported property name') + if query: + raise ValueError('The query parameter is not supported') + if not self._index_updating: + return self._index_store.get_activities() + else: + logging.warning('Index updating, returning an empty list') + return [] + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='s', + out_signature='') + def delete(self, uid): + self._optimizer.remove(uid) + + self._index_store.delete(uid) + self._file_store.delete(uid) + self._metadata_store.delete(uid) + + entry_path = layoutmanager.get_instance().get_entry_path(uid) + os.removedirs(entry_path) + + self.Deleted(uid) + logger.debug('deleted %s', uid) + + @dbus.service.signal(DS_DBUS_INTERFACE, signature="s") + def Deleted(self, uid): + pass + + def stop(self): + """shutdown the service""" + self._index_store.close_index() + self.Stopped() + + @dbus.service.signal(DS_DBUS_INTERFACE) + def Stopped(self): + pass + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature="sa{sv}", + out_signature='s') + def mount(self, uri, options=None): + return '' + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature="", + out_signature="aa{sv}") + def mounts(self): + return [{'id': 1}] + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature="s", + out_signature="") + def unmount(self, mountpoint_id): + pass + + @dbus.service.signal(DS_DBUS_INTERFACE, signature="a{sv}") + def Mounted(self, descriptior): + pass + + @dbus.service.signal(DS_DBUS_INTERFACE, signature="a{sv}") + def Unmounted(self, descriptor): + pass diff --git a/datastore/src/carquinyol/filestore.py b/datastore/src/carquinyol/filestore.py new file mode 100644 index 0000000..9724397 --- /dev/null +++ b/datastore/src/carquinyol/filestore.py @@ -0,0 +1,222 @@ +# Copyright (C) 2008, One Laptop Per Child +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import os +import errno +import logging +import tempfile + +import gobject + +from carquinyol import layoutmanager + + +class FileStore(object): + """Handle the storage of one file per entry. + """ + + # TODO: add protection against store and retrieve operations on entries + # that are being processed async. + + def store(self, uid, file_path, transfer_ownership, completion_cb): + """Store a file for a given entry. + + """ + dir_path = layoutmanager.get_instance().get_entry_path(uid) + if not os.path.exists(dir_path): + os.makedirs(dir_path) + + destination_path = layoutmanager.get_instance().get_data_path(uid) + if file_path: + if not os.path.isfile(file_path): + raise ValueError('No file at %r' % file_path) + if transfer_ownership: + try: + logging.debug('FileStore moving from %r to %r', file_path, + destination_path) + os.rename(file_path, destination_path) + completion_cb() + except OSError, e: + if e.errno == errno.EXDEV: + self._async_copy(file_path, destination_path, + completion_cb, unlink_src=True) + else: + raise + else: + self._async_copy(file_path, destination_path, completion_cb, + unlink_src=False) + """ + TODO: How can we support deleting the file of an entry? + elif not file_path and os.path.exists(destination_path): + logging.debug('FileStore: deleting %r' % destination_path) + os.remove(destination_path) + completion_cb() + """ + else: + logging.debug('FileStore: Nothing to do') + completion_cb() + + def _async_copy(self, file_path, destination_path, completion_cb, + unlink_src): + """Start copying a file asynchronously. + + """ + logging.debug('FileStore copying from %r to %r', file_path, + destination_path) + async_copy = AsyncCopy(file_path, destination_path, completion_cb, + unlink_src) + async_copy.start() + + def retrieve(self, uid, user_id, extension): + """Place the file associated to a given entry into a directory + where the user can read it. The caller is reponsible for + deleting this file. + + """ + file_path = layoutmanager.get_instance().get_data_path(uid) + if not os.path.exists(file_path): + logging.debug('Entry %r doesnt have any file', uid) + return '' + + use_instance_dir = os.path.exists('/etc/olpc-security') and \ + os.getuid() != user_id + if use_instance_dir: + if not user_id: + raise ValueError('Couldnt determine the current user uid.') + destination_dir = os.path.join(os.environ['HOME'], 'isolation', + '1', 'uid_to_instance_dir', str(user_id)) + else: + profile = os.environ.get('SUGAR_PROFILE', 'default') + destination_dir = os.path.join(os.path.expanduser('~'), '.sugar', + profile, 'data') + if not os.path.exists(destination_dir): + os.makedirs(destination_dir) + + if extension is None: + extension = '' + elif extension: + extension = '.' + extension + + fd, destination_path = tempfile.mkstemp(prefix=uid + '_', + suffix=extension, dir=destination_dir) + os.close(fd) + os.unlink(destination_path) + + # Try to hard link from the original file to the targetpath. This can + # fail if the file is in a different filesystem. Do a symlink instead. + try: + os.link(file_path, destination_path) + except OSError, e: + if e.errno == errno.EXDEV: + os.symlink(file_path, destination_path) + else: + raise + + # Try to make the original file readable. This can fail if the file is + # in a FAT filesystem. + try: + os.chmod(file_path, 0604) + except OSError, e: + if e.errno != errno.EPERM: + raise + + return destination_path + + def get_file_path(self, uid): + return layoutmanager.get_instance().get_data_path(uid) + + def delete(self, uid): + """Remove the file associated to a given entry. + + """ + file_path = layoutmanager.get_instance().get_data_path(uid) + if os.path.exists(file_path): + os.remove(file_path) + + def hard_link_entry(self, new_uid, existing_uid): + existing_file = layoutmanager.get_instance().get_data_path(existing_uid) + new_file = layoutmanager.get_instance().get_data_path(new_uid) + + logging.debug('removing %r', new_file) + os.remove(new_file) + + logging.debug('hard linking %r -> %r', new_file, existing_file) + os.link(existing_file, new_file) + + +class AsyncCopy(object): + """Copy a file in chunks in the idle loop. + + """ + CHUNK_SIZE = 65536 + + def __init__(self, src, dest, completion, unlink_src=False): + self.src = src + self.dest = dest + self.completion = completion + self._unlink_src = unlink_src + self.src_fp = -1 + self.dest_fp = -1 + self.written = 0 + self.size = 0 + + def _cleanup(self): + os.close(self.src_fp) + os.close(self.dest_fp) + + def _copy_block(self, user_data=None): + try: + data = os.read(self.src_fp, AsyncCopy.CHUNK_SIZE) + count = os.write(self.dest_fp, data) + self.written += len(data) + + # error writing data to file? + if count < len(data): + logging.error('AC: Error writing %s -> %s: wrote less than ' + 'expected', self.src, self.dest) + self._complete(RuntimeError( + 'Error writing data to destination file')) + return False + + # FIXME: emit progress here + + # done? + if len(data) < AsyncCopy.CHUNK_SIZE: + self._complete(None) + return False + except Exception, err: + logging.error('AC: Error copying %s -> %s: %r', self.src, self. + dest, err) + self._complete(err) + return False + + return True + + def _complete(self, *args): + self._cleanup() + if self._unlink_src: + os.unlink(self.src) + self.completion(*args) + + def start(self): + self.src_fp = os.open(self.src, os.O_RDONLY) + self.dest_fp = os.open(self.dest, os.O_RDWR | os.O_TRUNC | os.O_CREAT, + 0644) + + stat = os.fstat(self.src_fp) + self.size = stat[6] + + gobject.idle_add(self._copy_block) diff --git a/datastore/src/carquinyol/indexstore.py b/datastore/src/carquinyol/indexstore.py new file mode 100644 index 0000000..62b843b --- /dev/null +++ b/datastore/src/carquinyol/indexstore.py @@ -0,0 +1,353 @@ +# Copyright (C) 2008, One Laptop Per Child +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import logging +import os +import sys + +import gobject +import xapian +from xapian import WritableDatabase, Document, Enquire, Query + +from carquinyol import layoutmanager +from carquinyol.layoutmanager import MAX_QUERY_LIMIT + +_VALUE_UID = 0 +_VALUE_TIMESTAMP = 1 +_VALUE_TITLE = 2 + +_PREFIX_NONE = 'N' +_PREFIX_FULL_VALUE = 'F' +_PREFIX_UID = 'Q' +_PREFIX_ACTIVITY = 'A' +_PREFIX_ACTIVITY_ID = 'I' +_PREFIX_MIME_TYPE = 'M' +_PREFIX_KEEP = 'K' + +# Force a flush every _n_ changes to the db +_FLUSH_THRESHOLD = 20 + +# Force a flush after _n_ seconds since the last change to the db +_FLUSH_TIMEOUT = 5 + +_PROPERTIES_NOT_TO_INDEX = ['timestamp', 'preview'] + +_MAX_RESULTS = int(2 ** 31 - 1) + +_QUERY_TERM_MAP = { + 'uid': _PREFIX_UID, + 'activity': _PREFIX_ACTIVITY, + 'activity_id': _PREFIX_ACTIVITY_ID, + 'mime_type': _PREFIX_MIME_TYPE, + 'keep': _PREFIX_KEEP, +} + +_QUERY_VALUE_MAP = { + 'timestamp': {'number': _VALUE_TIMESTAMP, 'type': float}, +} + + +class TermGenerator (xapian.TermGenerator): + + def index_document(self, document, properties): + document.add_value(_VALUE_TIMESTAMP, + xapian.sortable_serialise(float(properties['timestamp']))) + document.add_value(_VALUE_TITLE, properties.get('title', '').strip()) + + self.set_document(document) + + properties = dict(properties) + self._index_known(document, properties) + self._index_unknown(document, properties) + + def _index_known(self, document, properties): + for name, prefix in _QUERY_TERM_MAP.items(): + if (name not in properties): + continue + + self._index_property(document, name, properties.pop(name), prefix) + + def _index_unknown(self, document, properties): + for name, value in properties.items(): + self._index_property(document, name, value) + + def _index_property(self, doc, name, value, prefix=''): + if name in _PROPERTIES_NOT_TO_INDEX or not value: + return + + if isinstance(value, unicode): + value = value.encode('utf-8') + elif not isinstance(value, basestring): + value = str(value) + + # We need to add the full value (i.e. not split into words) so + # we can enumerate unique values. It also simplifies setting up + # dictionary-based queries. + if prefix: + doc.add_term(_PREFIX_FULL_VALUE + prefix + value) + + self.index_text(value, 1, prefix or _PREFIX_NONE) + self.increase_termpos() + + +class QueryParser (xapian.QueryParser): + """QueryParser that understands dictionaries and Xapian query strings. + + The dictionary contains metadata names as keys and either basic types + (exact match), 2-tuples (range, only valid for value-stored metadata) + or a list (multiple exact matches joined with OR) as values. + An empty dictionary matches everything. Queries from different keys + (i.e. different metadata names) are joined with AND. + """ + + def __init__(self): + xapian.QueryParser.__init__(self) + + for name, prefix in _QUERY_TERM_MAP.items(): + self.add_prefix(name, prefix) + self.add_prefix('', prefix) + + self.add_prefix('', _PREFIX_NONE) + + def _parse_query_term(self, name, prefix, value): + if isinstance(value, list): + subqueries = [self._parse_query_term(name, prefix, word) + for word in value] + return Query(Query.OP_OR, subqueries) + + elif prefix: + return Query(_PREFIX_FULL_VALUE + prefix + str(value)) + else: + return Query(_PREFIX_NONE + str(value)) + + def _parse_query_value_range(self, name, info, value): + if len(value) != 2: + raise TypeError( + 'Only tuples of size 2 have a defined meaning. ' + 'Did you mean to pass a list instead?') + + start, end = value + return Query(Query.OP_VALUE_RANGE, info['number'], + self._convert_value(info, start), self._convert_value(info, end)) + + def _convert_value(self, info, value): + if info['type'] in (float, int, long): + return xapian.sortable_serialise(info['type'](value)) + + return str(info['type'](value)) + + def _parse_query_value(self, name, info, value): + if isinstance(value, list): + subqueries = [self._parse_query_value(name, info, word) + for word in value] + return Query(Query.OP_OR, subqueries) + + elif isinstance(value, tuple): + return self._parse_query_value_range(name, info, value) + + elif isinstance(value, dict): + # compatibility option for timestamp: {'start': 0, 'end': 1} + start = value.get('start', 0) + end = value.get('end', sys.maxint) + return self._parse_query_value_range(name, info, (start, end)) + + else: + return self._parse_query_value_range(name, info, (value, value)) + + def _parse_query_xapian(self, query_str): + try: + return xapian.QueryParser.parse_query( + self, query_str, + QueryParser.FLAG_PHRASE | + QueryParser.FLAG_BOOLEAN | + QueryParser.FLAG_LOVEHATE | + QueryParser.FLAG_WILDCARD, + '') + + except xapian.QueryParserError, exception: + logging.warning('Invalid query string: ' + exception.get_msg()) + return Query() + + def parse_query(self, query_dict, query_string): + logging.debug('parse_query %r %r', query_dict, query_string) + queries = [] + query_dict = dict(query_dict) + + if query_string is not None: + queries.append(self._parse_query_xapian(str(query_string))) + + for name, value in query_dict.items(): + if name in _QUERY_TERM_MAP: + queries.append(self._parse_query_term(name, + _QUERY_TERM_MAP[name], value)) + elif name in _QUERY_VALUE_MAP: + queries.append(self._parse_query_value(name, + _QUERY_VALUE_MAP[name], value)) + else: + logging.warning('Unknown term: %r=%r', name, value) + + if not queries: + queries.append(Query('')) + + logging.debug('queries: %r', [str(q) for q in queries]) + return Query(Query.OP_AND, queries) + + +class IndexStore(object): + """Index metadata and provide rich query facilities on it. + """ + + def __init__(self): + self._database = None + self._flush_timeout = None + self._pending_writes = 0 + self._index_updated_path = os.path.join( + layoutmanager.get_instance().get_root_path(), 'index_updated') + + def open_index(self): + index_path = layoutmanager.get_instance().get_index_path() + self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN) + + def close_index(self): + """Close index database if it is open.""" + if not self._database: + return + + self._flush(True) + self._database = None + + def remove_index(self): + index_path = layoutmanager.get_instance().get_index_path() + if not os.path.exists(index_path): + return + for f in os.listdir(index_path): + os.remove(os.path.join(index_path, f)) + + def contains(self, uid): + postings = self._database.postlist(_PREFIX_FULL_VALUE + \ + _PREFIX_UID + uid) + try: + postlist_item = postings.next() + except StopIteration: + return False + return True + + def store(self, uid, properties): + document = Document() + document.add_value(_VALUE_UID, uid) + term_generator = TermGenerator() + term_generator.index_document(document, properties) + + if not self.contains(uid): + self._database.add_document(document) + else: + self._database.replace_document(_PREFIX_FULL_VALUE + \ + _PREFIX_UID + uid, document) + + self._flush() + + def find(self, query): + offset = query.pop('offset', 0) + limit = query.pop('limit', MAX_QUERY_LIMIT) + order_by = query.pop('order_by', []) + query_string = query.pop('query', None) + + query_parser = QueryParser() + query_parser.set_database(self._database) + enquire = Enquire(self._database) + enquire.set_query(query_parser.parse_query(query, query_string)) + + # This will assure that the results count is exact. + check_at_least = offset + limit + 1 + + if not order_by: + order_by = '+timestamp' + else: + order_by = order_by[0] + + if order_by == '+timestamp': + enquire.set_sort_by_value(_VALUE_TIMESTAMP, True) + elif order_by == '-timestamp': + enquire.set_sort_by_value(_VALUE_TIMESTAMP, False) + elif order_by == '+title': + enquire.set_sort_by_value(_VALUE_TITLE, True) + elif order_by == '-title': + enquire.set_sort_by_value(_VALUE_TITLE, False) + else: + logging.warning('Unsupported property for sorting: %s', order_by) + + query_result = enquire.get_mset(offset, limit, check_at_least) + total_count = query_result.get_matches_estimated() + + uids = [] + for hit in query_result: + uids.append(hit.document.get_value(_VALUE_UID)) + + return (uids, total_count) + + def delete(self, uid): + self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid) + self._flush() + + def get_activities(self): + activities = [] + prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY + for term in self._database.allterms(prefix): + activities.append(term.term[len(prefix):]) + return activities + + def flush(self): + self._flush(True) + + def get_index_updated(self): + return os.path.exists(self._index_updated_path) + + index_updated = property(get_index_updated) + + def _set_index_updated(self, index_updated): + if index_updated != self.index_updated: + if index_updated: + index_updated_file = open(self._index_updated_path, 'w') + # index_updated = True will happen every + # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync + os.fsync(index_updated_file.fileno()) + index_updated_file.close() + else: + os.remove(self._index_updated_path) + + def _flush_timeout_cb(self): + self._flush(True) + return False + + def _flush(self, force=False): + """Called after any database mutation""" + logging.debug('IndexStore.flush: force=%r _pending_writes=%r', + force, self._pending_writes) + + self._set_index_updated(False) + + if self._flush_timeout is not None: + gobject.source_remove(self._flush_timeout) + self._flush_timeout = None + + self._pending_writes += 1 + if force or self._pending_writes > _FLUSH_THRESHOLD: + self._database.flush() + self._pending_writes = 0 + self._set_index_updated(True) + else: + self._flush_timeout = gobject.timeout_add_seconds(_FLUSH_TIMEOUT, + self._flush_timeout_cb) diff --git a/datastore/src/carquinyol/layoutmanager.py b/datastore/src/carquinyol/layoutmanager.py new file mode 100644 index 0000000..5c67203 --- /dev/null +++ b/datastore/src/carquinyol/layoutmanager.py @@ -0,0 +1,112 @@ +# Copyright (C) 2008, One Laptop Per Child +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import os +import logging + +MAX_QUERY_LIMIT = 40960 +CURRENT_LAYOUT_VERSION = 4 + +class LayoutManager(object): + """Provide the logic about how entries are stored inside the datastore + directory + """ + + def __init__(self): + profile = os.environ.get('SUGAR_PROFILE', 'default') + base_dir = os.path.join(os.path.expanduser('~'), '.sugar', profile) + + self._root_path = os.path.join(base_dir, 'datastore') + + if not os.path.exists(self._root_path): + os.makedirs(self._root_path) + + self._create_if_needed(self.get_checksums_dir()) + self._create_if_needed(self.get_queue_path()) + + def _create_if_needed(self, path): + if not os.path.exists(path): + os.makedirs(path) + + def get_version(self): + version_path = os.path.join(self._root_path, 'version') + version = 0 + if os.path.exists(version_path): + try: + version = int(open(version_path, 'r').read()) + except ValueError: + logging.exception('Can not read layout version') + version = 0 + + return version + + def set_version(self, version): + version_path = os.path.join(self._root_path, 'version') + open(version_path, 'w').write(str(version)) + + def get_entry_path(self, uid): + # os.path.join() is just too slow + return '%s/%s/%s' % (self._root_path, uid[:2], uid) + + def get_data_path(self, uid): + return '%s/%s/%s/data' % (self._root_path, uid[:2], uid) + + def get_metadata_path(self, uid): + return '%s/%s/%s/metadata' % (self._root_path, uid[:2], uid) + + def get_root_path(self): + return self._root_path + + def get_index_path(self): + return os.path.join(self._root_path, 'index') + + def get_checksums_dir(self): + return os.path.join(self._root_path, 'checksums') + + def get_queue_path(self): + return os.path.join(self.get_checksums_dir(), 'queue') + + def find_all(self): + uids = [] + for f in os.listdir(self._root_path): + if os.path.isdir(os.path.join(self._root_path, f)) and len(f) == 2: + for g in os.listdir(os.path.join(self._root_path, f)): + if len(g) == 36: + uids.append(g) + return uids + + def _is_empty(self): + """Check if there is any existing entry. + + All data store layout versions are handled. Will err on the safe + side (i.e. return False if there might be any entry).""" + if os.path.exists(os.path.join(self._root_path, 'store')): + # unmigrated 0.82 data store + return False + + for f in os.listdir(self._root_path): + if os.path.isdir(os.path.join(self._root_path, f)) and len(f) == 2: + for g in os.listdir(os.path.join(self._root_path, f)): + if len(g) == 36: + return False + return True + +_instance = None +def get_instance(): + global _instance + if _instance is None: + _instance = LayoutManager() + return _instance diff --git a/datastore/src/carquinyol/metadatareader.c b/datastore/src/carquinyol/metadatareader.c new file mode 100644 index 0000000..454c8c3 --- /dev/null +++ b/datastore/src/carquinyol/metadatareader.c @@ -0,0 +1,233 @@ +#include "Python.h" + +#include + +// TODO: put it in a place where python can use it when writing metadata +#define MAX_PROPERTY_LENGTH 500 * 1024 + +static PyObject *byte_array_type = NULL; + +int +add_property(const char *metadata_path, char *property_name, PyObject *dict, + int must_exist) +{ + int file_path_size; + char *file_path = NULL; + FILE *file = NULL; + long file_size; + char *value_buf = NULL; + PyObject *value = NULL; + struct stat file_stat; + + // Build path of the property file + file_path_size = strlen(metadata_path) + 1 + strlen(property_name) + 1; + file_path = PyMem_Malloc(file_path_size); + if (file_path == NULL) { + PyErr_NoMemory(); + return 0; + } + snprintf (file_path, file_path_size, "%s/%s", metadata_path, property_name); + + if ((!must_exist) && (stat(file_path, &file_stat) != 0)) { + PyMem_Free(file_path); + return 1; + } + + file = fopen(file_path, "r"); + if (file == NULL) { + char buf[256]; + snprintf(buf, sizeof(buf), "Cannot open property file %s: %s", + file_path, strerror(errno)); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + // Get file size + fseek (file, 0, SEEK_END); + file_size = ftell (file); + rewind (file); + + if (file_size == 0) { + // Empty property + fclose(file); + file = NULL; + + value = PyString_FromString(""); + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert value to python string"); + goto cleanup; + } + } else { + if (file_size > MAX_PROPERTY_LENGTH) { + PyErr_SetString(PyExc_ValueError, "Property file too big"); + goto cleanup; + } + + // Read the whole file + value_buf = PyMem_Malloc(file_size); + if (value_buf == NULL) { + PyErr_NoMemory(); + goto cleanup; + } + long read_size = fread(value_buf, 1, file_size, file); + if (read_size < file_size) { + char buf[256]; + snprintf(buf, sizeof(buf), + "Error while reading property file %s", file_path); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + fclose(file); + file = NULL; + + // Convert value to dbus.ByteArray + PyObject *args = Py_BuildValue("(s#)", value_buf, file_size); + + PyMem_Free(value_buf); + value_buf = NULL; + + value = PyObject_CallObject(byte_array_type, args); + Py_DECREF(args); + + if (value == NULL) { + PyErr_SetString(PyExc_ValueError, + "Failed to convert value to dbus.ByteArray"); + goto cleanup; + } + } + + // Add property to the metadata dict + if (PyDict_SetItemString(dict, property_name, value) == -1) { + PyErr_SetString(PyExc_ValueError, + "Failed to add property to dictionary"); + goto cleanup; + } + + Py_DECREF(value); + PyMem_Free(file_path); + + return 1; + +cleanup: + if (file_path) { + PyMem_Free(file_path); + } + if (value_buf) { + PyMem_Free(value_buf); + } + if (file) { + fclose(file); + } + if (value) { + Py_DECREF(value); + } + return 0; +} + +static PyObject * +read_from_properties_list (const char *metadata_path, PyObject *properties) +{ + PyObject *dict = PyDict_New(); + + int i; + for (i = 0; i < PyList_Size(properties); i++) { + PyObject *property = PyList_GetItem(properties, i); + char *property_name = PyString_AsString (property); + + if (add_property(metadata_path, property_name, dict, 0) == 0) + goto cleanup; + } + + return dict; + +cleanup: + if (dict) { + Py_DECREF(dict); + } + return NULL; +} + +static PyObject * +read_all_properties (const char *metadata_path) +{ + PyObject *dict = PyDict_New(); + DIR *dir_stream = NULL; + struct dirent *dir_entry = NULL; + + dir_stream = opendir (metadata_path); + if (dir_stream == NULL) { + char buf[256]; + snprintf(buf, sizeof(buf), "Couldn't open metadata directory %s", + metadata_path); + PyErr_SetString(PyExc_IOError, buf); + goto cleanup; + } + + dir_entry = readdir(dir_stream); + while (dir_entry != NULL) { + // Skip . and .. + if (dir_entry->d_name[0] == '.' && + (strlen(dir_entry->d_name) == 1 || + (dir_entry->d_name[1] == '.' && + strlen(dir_entry->d_name) == 2))) + goto next_property; + + if (add_property(metadata_path, dir_entry->d_name, dict, 1) == 0) + goto cleanup; + + next_property: + dir_entry = readdir(dir_stream); + } + + closedir(dir_stream); + + return dict; + +cleanup: + if (dict) { + Py_DECREF(dict); + } + if (dir_stream) { + closedir(dir_stream); + } + return NULL; +} + +static PyObject * +metadatareader_retrieve(PyObject *unused, PyObject *args) +{ + PyObject *dict = NULL; + PyObject *properties = NULL; + const char *metadata_path = NULL; + + if (!PyArg_ParseTuple(args, "sO:retrieve", &metadata_path, &properties)) + return NULL; + + if ((properties != Py_None) && (PyList_Size(properties) > 0)) { + dict = read_from_properties_list(metadata_path, properties); + } else { + dict = read_all_properties(metadata_path); + } + + return dict; +} + +static PyMethodDef metadatareader_functions[] = { + {"retrieve", metadatareader_retrieve, METH_VARARGS, + PyDoc_STR("Read a dictionary from a directory with a single file " \ + "(containing the content) per key")}, + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initmetadatareader(void) +{ + PyObject* mod; + mod = Py_InitModule("metadatareader", metadatareader_functions); + + PyObject *dbus_module = PyImport_ImportModule("dbus"); + byte_array_type = PyObject_GetAttrString(dbus_module, "ByteArray"); +} + diff --git a/datastore/src/carquinyol/metadatastore.py b/datastore/src/carquinyol/metadatastore.py new file mode 100644 index 0000000..5967017 --- /dev/null +++ b/datastore/src/carquinyol/metadatastore.py @@ -0,0 +1,60 @@ +import os + +from carquinyol import layoutmanager +from carquinyol import metadatareader + +MAX_SIZE = 256 +_INTERNAL_KEYS = ['checksum'] + + +class MetadataStore(object): + + def store(self, uid, metadata): + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + if not os.path.exists(metadata_path): + os.makedirs(metadata_path) + else: + for key in os.listdir(metadata_path): + if key not in _INTERNAL_KEYS: + os.remove(os.path.join(metadata_path, key)) + + metadata['uid'] = uid + for key, value in metadata.items(): + + # Hack to support activities that still pass properties named as + # for example title:text. + if ':' in key: + key = key.split(':', 1)[0] + + f = open(os.path.join(metadata_path, key), 'w') + try: + if isinstance(value, unicode): + value = value.encode('utf-8') + elif not isinstance(value, basestring): + value = str(value) + f.write(value) + finally: + f.close() + + def retrieve(self, uid, properties=None): + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + return metadatareader.retrieve(metadata_path, properties) + + def delete(self, uid): + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + for key in os.listdir(metadata_path): + os.remove(os.path.join(metadata_path, key)) + os.rmdir(metadata_path) + + def get_property(self, uid, key): + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + property_path = os.path.join(metadata_path, key) + if os.path.exists(property_path): + return open(property_path, 'r').read() + else: + return None + + def set_property(self, uid, key, value): + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + property_path = os.path.join(metadata_path, key) + open(property_path, 'w').write(value) diff --git a/datastore/src/carquinyol/migration.py b/datastore/src/carquinyol/migration.py new file mode 100644 index 0000000..95ee391 --- /dev/null +++ b/datastore/src/carquinyol/migration.py @@ -0,0 +1,103 @@ +# Copyright (C) 2008, One Laptop Per Child +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +"""Transform one DataStore directory in a newer format. +""" + +import os +import logging +import shutil +import time +import traceback + +import cjson + +from carquinyol import layoutmanager + +DATE_FORMAT = '%Y-%m-%dT%H:%M:%S' + + +def migrate_from_0(): + logging.info('Migrating datastore from version 0 to version 1') + + root_path = layoutmanager.get_instance().get_root_path() + old_root_path = os.path.join(root_path, 'store') + if not os.path.exists(old_root_path): + return + + for f in os.listdir(old_root_path): + uid, ext = os.path.splitext(f) + if ext != '.metadata': + continue + + logging.debug('Migrating entry %r', uid) + try: + _migrate_metadata(root_path, old_root_path, uid) + _migrate_file(root_path, old_root_path, uid) + _migrate_preview(root_path, old_root_path, uid) + except Exception: + logging.exception('Error while migrating entry %r', uid) + + # Just be paranoid, it's cheap. + if old_root_path.endswith('datastore/store'): + shutil.rmtree(old_root_path) + + logging.info('Migration finished') + + +def _migrate_metadata(root_path, old_root_path, uid): + dir_path = layoutmanager.get_instance().get_entry_path(uid) + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + os.makedirs(metadata_path) + + old_metadata_path = os.path.join(old_root_path, uid + '.metadata') + metadata = cjson.decode(open(old_metadata_path, 'r').read()) + + if 'uid' not in metadata: + metadata['uid'] = uid + + if 'timestamp' not in metadata and 'mtime' in metadata: + metadata['timestamp'] = \ + time.mktime(time.strptime(metadata['mtime'], DATE_FORMAT)) + + for key, value in metadata.items(): + try: + f = open(os.path.join(metadata_path, key), 'w') + try: + if isinstance(value, unicode): + value = value.encode('utf-8') + if not isinstance(value, basestring): + value = str(value) + f.write(value) + finally: + f.close() + except Exception: + logging.exception( + 'Error while migrating property %s of entry %s', key, uid) + + +def _migrate_file(root_path, old_root_path, uid): + if os.path.exists(os.path.join(old_root_path, uid)): + new_data_path = layoutmanager.get_instance().get_data_path(uid) + os.rename(os.path.join(old_root_path, uid), + new_data_path) + + +def _migrate_preview(root_path, old_root_path, uid): + dir_path = layoutmanager.get_instance().get_entry_path(uid) + metadata_path = layoutmanager.get_instance().get_metadata_path(uid) + os.rename(os.path.join(old_root_path, 'preview', uid), + os.path.join(metadata_path, 'preview')) diff --git a/datastore/src/carquinyol/optimizer.py b/datastore/src/carquinyol/optimizer.py new file mode 100644 index 0000000..2b6ce29 --- /dev/null +++ b/datastore/src/carquinyol/optimizer.py @@ -0,0 +1,167 @@ +# Copyright (C) 2008, One Laptop Per Child +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import os +import errno +import logging +import subprocess + +import gobject + +from carquinyol import layoutmanager + + +class Optimizer(object): + """Optimizes disk space usage by detecting duplicates and sharing storage. + """ + + def __init__(self, file_store, metadata_store): + self._file_store = file_store + self._metadata_store = metadata_store + self._enqueue_checksum_id = None + + def optimize(self, uid): + """Add an entry to a queue of entries to be checked for duplicates. + + """ + if not os.path.exists(self._file_store.get_file_path(uid)): + return + + queue_path = layoutmanager.get_instance().get_queue_path() + open(os.path.join(queue_path, uid), 'w').close() + logging.debug('optimize %r', os.path.join(queue_path, uid)) + + if self._enqueue_checksum_id is None: + self._enqueue_checksum_id = \ + gobject.idle_add(self._process_entry_cb, + priority=gobject.PRIORITY_LOW) + + def remove(self, uid): + """Remove any structures left from space optimization + + """ + checksum = self._metadata_store.get_property(uid, 'checksum') + if checksum is None: + return + + checksums_dir = layoutmanager.get_instance().get_checksums_dir() + checksum_path = os.path.join(checksums_dir, checksum) + checksum_entry_path = os.path.join(checksum_path, uid) + + if os.path.exists(checksum_entry_path): + logging.debug('remove %r', checksum_entry_path) + os.remove(checksum_entry_path) + + if os.path.exists(checksum_path): + try: + os.rmdir(checksum_path) + logging.debug('removed %r', checksum_path) + except OSError, e: + if e.errno != errno.ENOTEMPTY: + raise + + def _identical_file_already_exists(self, checksum): + """Check if we already have files with this checksum. + + """ + checksums_dir = layoutmanager.get_instance().get_checksums_dir() + checksum_path = os.path.join(checksums_dir, checksum) + return os.path.exists(checksum_path) + + def _get_uid_from_checksum(self, checksum): + """Get an existing entry which file matches checksum. + + """ + checksums_dir = layoutmanager.get_instance().get_checksums_dir() + checksum_path = os.path.join(checksums_dir, checksum) + first_uid = os.listdir(checksum_path)[0] + return first_uid + + def _create_checksum_dir(self, checksum): + """Create directory that tracks files with this same checksum. + + """ + checksums_dir = layoutmanager.get_instance().get_checksums_dir() + checksum_path = os.path.join(checksums_dir, checksum) + logging.debug('create dir %r', checksum_path) + os.mkdir(checksum_path) + + def _add_checksum_entry(self, uid, checksum): + """Create a file in the checksum dir with the uid of the entry + + """ + checksums_dir = layoutmanager.get_instance().get_checksums_dir() + checksum_path = os.path.join(checksums_dir, checksum) + + logging.debug('touch %r', os.path.join(checksum_path, uid)) + open(os.path.join(checksum_path, uid), 'w').close() + + def _already_linked(self, uid, checksum): + """Check if this entry's file is already a hard link to the checksums + dir. + + """ + checksums_dir = layoutmanager.get_instance().get_checksums_dir() + checksum_path = os.path.join(checksums_dir, checksum) + return os.path.exists(os.path.join(checksum_path, uid)) + + def _process_entry_cb(self): + """Process one item in the checksums queue by calculating its checksum, + checking if there exist already an identical file, and in that case + substituting its file with a hard link to that pre-existing file. + + """ + queue_path = layoutmanager.get_instance().get_queue_path() + queue = os.listdir(queue_path) + if queue: + uid = queue[0] + logging.debug('_process_entry_cb processing %r', uid) + + file_in_entry_path = self._file_store.get_file_path(uid) + if not os.path.exists(file_in_entry_path): + logging.info('non-existent entry in queue: %r', uid) + else: + checksum = self._calculate_md5sum(file_in_entry_path) + self._metadata_store.set_property(uid, 'checksum', checksum) + + if self._identical_file_already_exists(checksum): + if not self._already_linked(uid, checksum): + existing_entry_uid = \ + self._get_uid_from_checksum(checksum) + + self._file_store.hard_link_entry(uid, + existing_entry_uid) + + self._add_checksum_entry(uid, checksum) + else: + self._create_checksum_dir(checksum) + self._add_checksum_entry(uid, checksum) + + os.remove(os.path.join(queue_path, uid)) + + if len(queue) <= 1: + self._enqueue_checksum_id = None + return False + else: + return True + + def _calculate_md5sum(self, path): + """Calculate the md5 checksum of a given file. + + """ + popen = subprocess.Popen(['md5sum', path], stdout=subprocess.PIPE) + stdout, stderr_ = popen.communicate() + return stdout.split(' ', 1)[0] -- cgit v0.9.1