Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarco Pesenti Gritti <marco@marcopg.org>2010-08-22 02:45:43 (GMT)
committer Marco Pesenti Gritti <marco@marcopg.org>2010-08-22 02:45:43 (GMT)
commit192ad89c9ade5dd9c38cfe3280666f3a2c7c17d3 (patch)
tree2ab249ebe5ab915fc48231789d4cd1c4d5be18eb
parent415b048faad644549d270817b5fb8910c2752bdb (diff)
parent0c3d1b3aaeb0ca69693aa325e32e143a9fae047f (diff)
Merge datastore module in a subdirectory
-rw-r--r--datastore/.gitignore35
-rw-r--r--datastore/AUTHORS24
-rw-r--r--datastore/COPYING222
-rw-r--r--datastore/Makefile.am7
-rw-r--r--datastore/NEWS41
-rw-r--r--datastore/README41
-rwxr-xr-xdatastore/autogen.sh5
-rw-r--r--datastore/bin/Makefile.am6
-rwxr-xr-xdatastore/bin/copy-from-journal121
-rwxr-xr-xdatastore/bin/copy-to-journal97
-rwxr-xr-xdatastore/bin/datastore-service58
-rw-r--r--datastore/configure.ac24
-rw-r--r--datastore/etc/.gitignore1
-rw-r--r--datastore/etc/Makefile.am12
-rw-r--r--datastore/etc/org.laptop.sugar.DataStore.service.in3
-rw-r--r--datastore/m4/python.m462
-rwxr-xr-xdatastore/maint-helper.py230
-rw-r--r--datastore/src/Makefile.am1
-rw-r--r--datastore/src/carquinyol/Makefile.am23
-rw-r--r--datastore/src/carquinyol/__init__.py0
-rw-r--r--datastore/src/carquinyol/datastore.py354
-rw-r--r--datastore/src/carquinyol/filestore.py222
-rw-r--r--datastore/src/carquinyol/indexstore.py353
-rw-r--r--datastore/src/carquinyol/layoutmanager.py112
-rw-r--r--datastore/src/carquinyol/metadatareader.c233
-rw-r--r--datastore/src/carquinyol/metadatastore.py60
-rw-r--r--datastore/src/carquinyol/migration.py103
-rw-r--r--datastore/src/carquinyol/optimizer.py167
28 files changed, 2617 insertions, 0 deletions
diff --git a/datastore/.gitignore b/datastore/.gitignore
new file mode 100644
index 0000000..12965e2
--- /dev/null
+++ b/datastore/.gitignore
@@ -0,0 +1,35 @@
+# Generic
+
+*.pyc
+*~
+*.deps
+*.libs
+*.la
+*.lo
+*.loT
+*.service
+
+# Absolute
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache
+compile
+config.guess
+config.log
+config.status
+config.sub
+configure
+depcomp
+install-sh
+libtool
+ltmain.sh
+missing
+mkinstalldirs
+py-compile
+stamp-h1
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
diff --git a/datastore/AUTHORS b/datastore/AUTHORS
new file mode 100644
index 0000000..a2c2720
--- /dev/null
+++ b/datastore/AUTHORS
@@ -0,0 +1,24 @@
+Contributors
+============
+
+Aleksey Lim <alsroot@member.fsf.org>
+Benjamin Saller <bcsaller@objectrealms.net>
+Bernie Innocenti <bernie@codewiz.org>
+Bert Freudenberg <bert@freudenbergs.de>
+Dan Williams <dcbw@redhat.com>
+Marco Pesenti Gritti <mpg@redhat.com>
+Sascha Silbe <sascha@silbe.org>
+Simon McVittie <simon.mcvittie@collabora.co.uk>
+Simon Schampijer <simon@schampijer.de>
+Tomeu Vizoso <tomeu@sugarlabs.org>
+Wade Brainerd <wadetb@gmail.com>
+
+Past maintainers
+================
+
+Tomeu Vizoso <tomeu@sugarlabs.org>
+
+Current maintainers
+===================
+
+Aleksey Lim <alsroot@member.fsf.org>
diff --git a/datastore/COPYING b/datastore/COPYING
new file mode 100644
index 0000000..ba9543b
--- /dev/null
+++ b/datastore/COPYING
@@ -0,0 +1,222 @@
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
diff --git a/datastore/Makefile.am b/datastore/Makefile.am
new file mode 100644
index 0000000..bfebefe
--- /dev/null
+++ b/datastore/Makefile.am
@@ -0,0 +1,7 @@
+ACLOCAL_AMFLAGS = -I m4
+
+SUBDIRS = bin etc src
+
+test:
+ @cd tests
+ $(MAKE) -C tests test
diff --git a/datastore/NEWS b/datastore/NEWS
new file mode 100644
index 0000000..0a765be
--- /dev/null
+++ b/datastore/NEWS
@@ -0,0 +1,41 @@
+v0.89.2 2010-08-04
+* sl#2132: reduce _FLUSH_TIMEOUT to 5 seconds
+* Set index_updated flag on ds shutting down #2095
+
+v0.89.1 2010-07-14
+* Invert index_updated logic #2095
+
+v0.88.0 2010-03-29
+* fix migration from 0.82 data store (#1838)
+
+v0.87.4 2010-03-10
+* Do not remove internal metadata fields while rewriting object #1813
+* Auto detect mime type in copy-to-journal #586 (tomeu)
+
+v0.87.3 2010-03-08
+* rebuild index on migration #1787
+
+v0.87.2 2009-12-21
+* copy-to-journal dbus error #573
+* file descriptor leak in filestore.retrieve() #1550
+* range queries use lexical comparison, not numerical #1342
+* add and use type information for Xapian value-stored properties #1437
+
+v0.87.1 2009-12-01
+* Make reading version file more robust (sayamindu) #1562
+* Use gobject_timeout_add_seconds to make power usage more efficient (sayamindu) #1567
+
+v0.86.2 2010-03-08
+* rebuild index on migration #1787
+
+v0.86.1 2009-10-01
+* Screenshot file is not deleted #1445
+
+v0.86.0 2009-09-21
+* fix 'error return without exception set' on ARM (#1404)
+* eliminate incorrect warning about unknown search terms (#1363)
+* Fix AUTHORS file
+* Add AM_MAINTAINER_MODE macro to configure.ac
+
+v0.85.4 2009-09-17
+* Use basename for uploaded files by default in copy-to-journal script #1372
diff --git a/datastore/README b/datastore/README
new file mode 100644
index 0000000..b29cebb
--- /dev/null
+++ b/datastore/README
@@ -0,0 +1,41 @@
+About
+=====
+
+Provides activities with a way to store data and metadata and the journal
+with means for querying, including full text search.
+
+
+Resources
+=========
+
+Code Repository
+ http://git.sugarlabs.org/projects/sugar-datastore/
+
+Bug Tracking
+ http://dev.sugarlabs.org/
+
+Home Page
+ http://wiki.sugarlabs.org/
+
+
+Storage format history
+======================
+
+0 0.82.x
+ Initial format
+
+1 0.84.x
+ Refactoring, start using indexes
+
+2 0.86.0, 0.86.1
+ Add sorting by title and mtime
+
+3 not-mainstream
+ test versioning support
+
+4 0.86.2, 0.88.x
+ version bump to force index rebuild that may have been missed during the
+ migration to version 2 (SL#1787)
+
+5 not-mainstream
+ test versioning support (version bump for SL#1787)
diff --git a/datastore/autogen.sh b/datastore/autogen.sh
new file mode 100755
index 0000000..1cd5db4
--- /dev/null
+++ b/datastore/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+export ACLOCAL="aclocal -I m4"
+
+autoreconf -i
+./configure "$@"
diff --git a/datastore/bin/Makefile.am b/datastore/bin/Makefile.am
new file mode 100644
index 0000000..c583cbe
--- /dev/null
+++ b/datastore/bin/Makefile.am
@@ -0,0 +1,6 @@
+bin_SCRIPTS = \
+ datastore-service \
+ copy-from-journal \
+ copy-to-journal
+
+EXTRA_DIST = $(bin_SCRIPTS)
diff --git a/datastore/bin/copy-from-journal b/datastore/bin/copy-from-journal
new file mode 100755
index 0000000..7a10bfd
--- /dev/null
+++ b/datastore/bin/copy-from-journal
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+#
+# Simple script to export a file from the datastore
+# Reinier Heeres, <reinier@heeres.eu>, 2007-12-24
+# Phil Bordelon <phil@thenexusproject.org>
+
+import sys
+import os
+import shutil
+import optparse
+import dbus
+
+if os.path.exists("/tmp/olpc-session-bus"):
+ os.environ["DBUS_SESSION_BUS_ADDRESS"] = "unix:path=/tmp/olpc-session-bus"
+
+from sugar.datastore import datastore
+import sugar.mime
+
+# Limit the number of objects returned on an ambiguous query to this number,
+# for quicker operation.
+RETURN_LIMIT = 2
+
+def build_option_parser():
+
+ usage = "Usage: %prog [-o OBJECT_ID] [-q SEARCH_STR] [-m] OUTFILE"
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option("-o", "--object_id", action="store", dest="object_id",
+ help="Retrieve object with explicit ID OBJECT_ID",
+ metavar="OBJECT_ID", default=None)
+
+ parser.add_option("-q", "--query", action="store", dest="query",
+ help="Full-text-search the metadata for SEARCH_STR",
+ metavar="SEARCH_STR", default=None)
+
+ parser.add_option("-m", "--metadata", action="store_true", dest="show_meta",
+ help="Show all non-preview metadata [default: hide]",
+ default=False)
+
+ return parser
+
+if __name__ == "__main__":
+
+ parser = build_option_parser()
+ options, args = parser.parse_args()
+ if len(args) < 1:
+ parser.print_help()
+ exit(0)
+
+ try:
+ dsentry = None
+
+ # Get object directly if we were given an explicit object ID.
+ if options.object_id is not None:
+ dsentry = datastore.get(options.object_id)
+
+ # Compose the query based on the options provided.
+ if dsentry is None:
+ query = {}
+
+ if options.query is not None:
+ query['query'] = options.query
+
+ # We only want a single file at a time; limit the number of objects
+ # returned to two, as anything more than one means the criteria were
+ # not limited enough.
+ objects, count = datastore.find(query, limit=RETURN_LIMIT, sorting='-mtime')
+ if count > 1:
+ print 'WARNING: %d objects found; retrieving most recent.' % (count)
+ for i in xrange(1, RETURN_LIMIT):
+ objects[i].destroy()
+
+ if count > 0:
+ dsentry = objects[0]
+
+ # If neither an explicit object ID nor a query gave us data, fail.
+ if dsentry is None:
+ print 'ERROR: unable to determine journal object to copy.'
+ parser.print_help()
+ exit(0)
+
+ # Print metadata if that is what the user asked for.
+ if options.show_meta:
+ print 'Metadata:'
+ for key, val in dsentry.metadata.get_dictionary().iteritems():
+ if key != 'preview':
+ print '%20s -> %s' % (key, val)
+
+ # If no file is associated with this object, we can't save it out.
+ if dsentry.get_file_path() == "":
+ print 'ERROR: no file associated with object, just metadata.'
+ dsentry.destroy()
+ exit(0)
+
+ outname = args[0]
+ outroot, outext = os.path.splitext(outname)
+
+ # Do our best to determine the output file extension, based on Sugar's
+ # MIME-type-to-extension mappings.
+ if outext == "":
+ mimetype = dsentry.metadata['mime_type']
+ outext = sugar.mime.get_primary_extension(mimetype)
+ if outext == None:
+ outext = "dsobject"
+ outext = '.' + outext
+
+ # Lastly, actually copy the file out of the datastore and onto the
+ # filesystem.
+ shutil.copyfile(dsentry.get_file_path(), outroot + outext)
+ print '%s -> %s' % (dsentry.get_file_path(), outroot + outext)
+
+ # Cleanup.
+ dsentry.destroy()
+
+ except dbus.DBusException:
+ print 'ERROR: Unable to connect to the datastore.\n'\
+ 'Check that you are running in the same environment as the '\
+ 'datastore service.'
+
+ except Exception, e:
+ print 'ERROR: %s' % (e)
diff --git a/datastore/bin/copy-to-journal b/datastore/bin/copy-to-journal
new file mode 100755
index 0000000..ca6f872
--- /dev/null
+++ b/datastore/bin/copy-to-journal
@@ -0,0 +1,97 @@
+#!/usr/bin/python
+#
+# Simple script to import a file to the datastore
+# Reinier Heeres, <reinier@heeres.eu>, 2007-12-20
+#
+# Modified by Phil Bordelon <phil@thenexusproject.org> 2007-12-20, 2007-12-21
+# to support adding metadata. Note that the MIME-type is required,
+# as otherwise the datastore will not accept the file.
+
+import sys
+import os
+import optparse
+from gettext import gettext as _
+import dbus
+
+if os.path.exists("/tmp/olpc-session-bus"):
+ os.environ["DBUS_SESSION_BUS_ADDRESS"] = "unix:path=/tmp/olpc-session-bus"
+
+from sugar.datastore import datastore
+from sugar import mime
+
+def build_option_parser():
+
+ usage = "Usage: %prog <file> [-m MIMETYPE] [-t TITLE] [-d DESC] [-T tag1 [-T tag2 ...]]"
+ parser = optparse.OptionParser(usage=usage)
+
+ parser.add_option("-t", "--title", action="store", dest="title",
+ help="Set the title of the journal entry to TITLE", metavar="TITLE",
+ default=None)
+ parser.add_option("-d", "--description", action="store",
+ dest="description", metavar="DESC",
+ help="Set the description of the journal entry to DESC",
+ default=None)
+ parser.add_option("-m", "--mimetype", action="store",
+ dest="mimetype", metavar="MIMETYPE",
+ help="Set the file's MIME-type to MIMETYPE",
+ default=None)
+ parser.add_option("-T", "--tag", action="append", dest="tag_list",
+ help="Add tag TAG to the journal entry's tags; this option can be repeated",
+ metavar="TAG")
+ return parser
+
+if __name__ == "__main__":
+
+ parser = build_option_parser()
+ options, args = parser.parse_args()
+ if len(args) < 1:
+ parser.print_help()
+ exit(0)
+
+ fname = args[0]
+ absname = os.path.abspath(fname)
+ if not os.path.exists(absname):
+ print 'Error: File does not exist.'
+ parser.print_help()
+ exit(0)
+
+ try:
+ entry = datastore.create()
+ entry.set_file_path(absname)
+
+ # Set the mimetype to the provided one.
+ if options.mimetype is None:
+ entry.metadata['mime_type'] = mime.get_for_file(absname)
+ else:
+ entry.metadata['mime_type'] = options.mimetype
+
+ # If no title is given, use the filename.
+ if options.title:
+ entry.metadata['title'] = options.title
+ else:
+ entry.metadata['title'] = os.path.basename(fname)
+
+ # Use the description given, otherwise leave it blank.
+ if options.description:
+ entry.metadata['description'] = options.description
+ else:
+ entry.metadata['description'] = _('From: %s') % fname
+
+ # Lastly, if any tags are given, combine them into a single string
+ # and save them.
+ if options.tag_list:
+ tag_string = " ".join(options.tag_list)
+ entry.metadata['tags'] = tag_string
+
+ datastore.write(entry)
+ print 'Created as %s' % (entry.object_id)
+
+ entry.destroy()
+
+ except dbus.DBusException:
+ print 'ERROR: Unable to connect to the datastore.\n'\
+ 'Check that you are running in the same environment as the '\
+ 'datastore service.'
+
+ except Exception, e:
+ print 'ERROR: %s' % (e)
diff --git a/datastore/bin/datastore-service b/datastore/bin/datastore-service
new file mode 100755
index 0000000..06b6517
--- /dev/null
+++ b/datastore/bin/datastore-service
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+import sys, os, signal, logging
+import gobject
+import dbus.service
+import dbus.mainloop.glib
+import dbus.glib
+from carquinyol.datastore import DataStore
+from sugar import logger
+
+# Path handling
+profile = os.environ.get('SUGAR_PROFILE', 'default')
+base_dir = os.path.join(os.path.expanduser('~'), '.sugar', profile)
+log_dir = os.path.join(base_dir, "logs")
+if not os.path.exists(log_dir): os.makedirs(log_dir)
+
+# setup logger
+logger.start('datastore')
+
+# build the datastore
+dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
+bus = dbus.SessionBus()
+connected = True
+
+ds = DataStore()
+
+# and run it
+mainloop = gobject.MainLoop()
+
+def handle_disconnect():
+ global mainloop
+ mainloop.quit()
+ logging.debug("Datastore disconnected from the bus.")
+
+def handle_shutdown(signum, frame):
+ global mainloop
+ mainloop.quit()
+ raise SystemExit("Shutting down on signal %s" % signum)
+
+bus.set_exit_on_disconnect(False)
+bus.add_signal_receiver(handle_disconnect,
+ signal_name='Disconnected',
+ dbus_interface='org.freedesktop.DBus.Local')
+
+signal.signal(signal.SIGHUP, handle_shutdown)
+signal.signal(signal.SIGTERM, handle_shutdown)
+
+def main():
+ try:
+ mainloop.run()
+ except KeyboardInterrupt:
+ logging.info("DataStore shutdown by user")
+ except:
+ logging.error("Datastore shutdown with error", exc_info=sys.exc_info())
+
+main()
+
+ds.stop()
+
diff --git a/datastore/configure.ac b/datastore/configure.ac
new file mode 100644
index 0000000..6ccea2e
--- /dev/null
+++ b/datastore/configure.ac
@@ -0,0 +1,24 @@
+AC_INIT([sugar-datastore],[0.89.2],[],[sugar-datastore])
+
+AC_PREREQ([2.59])
+
+AC_CONFIG_MACRO_DIR([m4])
+AC_CONFIG_SRCDIR([configure.ac])
+
+AM_INIT_AUTOMAKE([1.9 foreign dist-bzip2 no-dist-gzip])
+
+AM_MAINTAINER_MODE
+
+AC_DISABLE_STATIC
+AC_PROG_LIBTOOL
+
+AM_PATH_PYTHON([2.5])
+AM_CHECK_PYTHON_HEADERS(,[AC_MSG_ERROR(could not find Python headers)])
+
+AC_OUTPUT([
+Makefile
+bin/Makefile
+etc/Makefile
+src/Makefile
+src/carquinyol/Makefile
+])
diff --git a/datastore/etc/.gitignore b/datastore/etc/.gitignore
new file mode 100644
index 0000000..c0ede5e
--- /dev/null
+++ b/datastore/etc/.gitignore
@@ -0,0 +1 @@
+org.laptop.sugar.DataStore.service
diff --git a/datastore/etc/Makefile.am b/datastore/etc/Makefile.am
new file mode 100644
index 0000000..a9b28b1
--- /dev/null
+++ b/datastore/etc/Makefile.am
@@ -0,0 +1,12 @@
+servicedir = $(datadir)/dbus-1/services
+service_in_files = \
+ org.laptop.sugar.DataStore.service.in
+
+service_DATA = $(service_in_files:.service.in=.service)
+
+org.laptop.sugar.DataStore.service: org.laptop.sugar.DataStore.service.in
+ @sed -e "s|\@bindir\@|$(bindir)|" $< > $@
+
+DISTCLEANFILES = $(service_DATA)
+
+EXTRA_DIST = $(service_in_files)
diff --git a/datastore/etc/org.laptop.sugar.DataStore.service.in b/datastore/etc/org.laptop.sugar.DataStore.service.in
new file mode 100644
index 0000000..1b5b270
--- /dev/null
+++ b/datastore/etc/org.laptop.sugar.DataStore.service.in
@@ -0,0 +1,3 @@
+[D-BUS Service]
+Name = org.laptop.sugar.DataStore
+Exec = @bindir@/datastore-service
diff --git a/datastore/m4/python.m4 b/datastore/m4/python.m4
new file mode 100644
index 0000000..e1c5266
--- /dev/null
+++ b/datastore/m4/python.m4
@@ -0,0 +1,62 @@
+## this one is commonly used with AM_PATH_PYTHONDIR ...
+dnl AM_CHECK_PYMOD(MODNAME [,SYMBOL [,ACTION-IF-FOUND [,ACTION-IF-NOT-FOUND]]])
+dnl Check if a module containing a given symbol is visible to python.
+AC_DEFUN([AM_CHECK_PYMOD],
+[AC_REQUIRE([AM_PATH_PYTHON])
+py_mod_var=`echo $1['_']$2 | sed 'y%./+-%__p_%'`
+AC_MSG_CHECKING(for ifelse([$2],[],,[$2 in ])python module $1)
+AC_CACHE_VAL(py_cv_mod_$py_mod_var, [
+ifelse([$2],[], [prog="
+import sys
+try:
+ import $1
+except ImportError:
+ sys.exit(1)
+except:
+ sys.exit(0)
+sys.exit(0)"], [prog="
+import $1
+$1.$2"])
+if $PYTHON -c "$prog" 1>&AC_FD_CC 2>&AC_FD_CC
+ then
+ eval "py_cv_mod_$py_mod_var=yes"
+ else
+ eval "py_cv_mod_$py_mod_var=no"
+ fi
+])
+py_val=`eval "echo \`echo '$py_cv_mod_'$py_mod_var\`"`
+if test "x$py_val" != xno; then
+ AC_MSG_RESULT(yes)
+ ifelse([$3], [],, [$3
+])dnl
+else
+ AC_MSG_RESULT(no)
+ ifelse([$4], [],, [$4
+])dnl
+fi
+])
+
+dnl a macro to check for ability to create python extensions
+dnl AM_CHECK_PYTHON_HEADERS([ACTION-IF-POSSIBLE], [ACTION-IF-NOT-POSSIBLE])
+dnl function also defines PYTHON_INCLUDES
+AC_DEFUN([AM_CHECK_PYTHON_HEADERS],
+[AC_REQUIRE([AM_PATH_PYTHON])
+AC_MSG_CHECKING(for headers required to compile python extensions)
+dnl deduce PYTHON_INCLUDES
+py_prefix=`$PYTHON -c "import sys; print sys.prefix"`
+py_exec_prefix=`$PYTHON -c "import sys; print sys.exec_prefix"`
+PYTHON_INCLUDES="-I${py_prefix}/include/python${PYTHON_VERSION}"
+if test "$py_prefix" != "$py_exec_prefix"; then
+ PYTHON_INCLUDES="$PYTHON_INCLUDES -I${py_exec_prefix}/include/python${PYTHON_VERSION}"
+fi
+AC_SUBST(PYTHON_INCLUDES)
+dnl check if the headers exist:
+save_CPPFLAGS="$CPPFLAGS"
+CPPFLAGS="$CPPFLAGS $PYTHON_INCLUDES"
+AC_TRY_CPP([#include <Python.h>],dnl
+[AC_MSG_RESULT(found)
+$1],dnl
+[AC_MSG_RESULT(not found)
+$2])
+CPPFLAGS="$save_CPPFLAGS"
+])
diff --git a/datastore/maint-helper.py b/datastore/maint-helper.py
new file mode 100755
index 0000000..5ffd7e0
--- /dev/null
+++ b/datastore/maint-helper.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+
+# Copyright (C) 2007, Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import os
+import sys
+import re
+import datetime
+import subprocess
+
+
+SOURCE_EXTS = ['.py', '.c', '.h', '.cpp']
+COPYRIGHT = 'Copyright (C) '
+
+
+def is_source(path):
+ for ext in SOURCE_EXTS:
+ if path.endswith(ext):
+ return True
+
+
+def get_name_and_version():
+ f = open('configure.ac', 'r')
+ config = f.read()
+ f.close()
+
+ exp = 'AC_INIT\(\[[^\]]+\],\[([^\]]+)\],\[\],\[([^\]]+)\]'
+ match = re.search(exp, config)
+ if not match:
+ print 'Cannot find the package name and version.'
+ sys.exit(0)
+
+ return (match.group(2), match.group(1))
+
+
+def cmd_help():
+ print 'Usage: \n\
+maint-helper.py build-snapshot - build a source snapshot \n\
+maint-helper.py fix-copyright [path] - fix the copyright year \n\
+maint-helper.py check-licenses - check licenses in the source'
+
+
+def cmd_build_snapshot():
+ name, version = get_name_and_version()
+
+ print 'Update git...'
+
+ retcode = subprocess.call(['git', 'pull'])
+ if retcode:
+ print 'ERROR - cannot pull from git'
+
+ cmd = 'git-show-ref --hash=10 refs/heads/master'
+ alphatag = os.popen(cmd).readline().strip()
+
+ tarball = '%s-%s-git%s.tar.bz2' % (name, version, alphatag)
+
+ print 'Build %s...' % tarball
+
+ os.spawnlp(os.P_WAIT, 'make', 'make', 'distcheck')
+
+ os.rename('%s-%s.tar.bz2' % (name, version), tarball)
+
+ print 'Update NEWS.sugar...'
+
+ if 'SUGAR_NEWS' in os.environ:
+ sugar_news_path = os.environ['SUGAR_NEWS']
+ if os.path.isfile(sugar_news_path):
+ f = open(sugar_news_path, 'r')
+ sugar_news = f.read()
+ f.close()
+ else:
+ sugar_news = ''
+
+ name, version = get_name_and_version()
+ sugar_news += '%s - %s - %s\n\n' % (name, version, alphatag)
+
+ f = open('NEWS', 'r')
+ for line in f.readlines():
+ if len(line.strip()) > 0:
+ sugar_news += line
+ else:
+ break
+ f.close()
+
+ f = open(sugar_news_path, 'w')
+ f.write(sugar_news)
+ f.close()
+
+ print 'Update NEWS...'
+
+ f = open('NEWS', 'r')
+ news = f.read()
+ f.close()
+
+ news = 'Snapshot %s\n\n' % alphatag + news
+
+ f = open('NEWS', 'w')
+ f.write(news)
+ f.close()
+
+ print 'Committing to git...'
+
+ changelog = 'Snapshot %s.' % alphatag
+ retcode = subprocess.call(['git', 'commit', '-a', '-m % s' % changelog])
+ if retcode:
+ print 'ERROR - cannot commit to git'
+
+ retcode = subprocess.call(['git', 'push'])
+ if retcode:
+ print 'ERROR - cannot push to git'
+
+ print 'Done.'
+
+
+def check_licenses(path, license, missing):
+ matchers = {'LGPL': 'GNU Lesser General Public',
+ 'GPL': 'GNU General Public License'}
+
+ license_file = os.path.join(path, '.license')
+ if os.path.isfile(license_file):
+ f = open(license_file, 'r')
+ license = f.readline().strip()
+ f.close()
+
+ for item in os.listdir(path):
+ full_path = os.path.join(path, item)
+
+ if os.path.isdir(full_path):
+ check_licenses(full_path, license, missing)
+ else:
+ check_source = is_source(item)
+
+ # Special cases.
+ if item.find('marshal') > 0 or \
+ item.startswith('egg') > 0:
+ check_source = False
+
+ if check_source:
+ f = open(full_path, 'r')
+ source = f.read()
+ f.close()
+
+ miss_license = True
+ if source.find(matchers[license]) > 0:
+ miss_license = False
+
+ # Special cases.
+ if source.find('THIS FILE IS GENERATED') > 0:
+ miss_license = False
+
+ if miss_license:
+ if license not in missing:
+ missing[license] = []
+ missing[license].append(full_path)
+
+
+def cmd_check_licenses():
+ missing = {}
+ check_licenses(os.getcwd(), 'GPL', missing)
+
+ for item in missing.keys():
+ print '%s:\n' % item
+ for path in missing[item]:
+ print path
+ print '\n'
+
+
+def fix_copyright(path):
+ for item in os.listdir(path):
+ full_path = os.path.join(path, item)
+
+ if os.path.isdir(full_path):
+ fix_copyright(full_path)
+ elif is_source(item):
+ f = open(full_path, 'r')
+ source = f.read()
+ f.close()
+
+ year_start = -1
+ year_end = -1
+
+ i1 = source.find(COPYRIGHT)
+ if i1 != -1:
+ i1 += len(COPYRIGHT)
+ i2 = i1 + source[i1:].find(' ')
+ if i1 > 0:
+ try:
+ year_start = int(source[i1:i1 + 4])
+ year_end = int(source[i1 + 6: i1 + 10])
+ except ValueError:
+ pass
+
+ if year_start > 0 and year_end < 0:
+ year_end = year_start
+
+ year = datetime.date.today().year
+ if year_end < year:
+ result = '%s%d-%d%s' % (source[:i1], year_start,
+ year, source[i2:])
+ f = open(full_path, 'w')
+ f.write(result)
+ f.close()
+
+
+def cmd_fix_copyright(path):
+ fix_copyright(path)
+
+
+if len(sys.argv) < 2:
+ cmd_help()
+elif sys.argv[1] == 'build-snapshot':
+ cmd_build_snapshot()
+elif sys.argv[1] == 'check-licenses':
+ cmd_check_licenses()
+elif sys.argv[1] == 'fix-copyright' and len(sys.argv) > 2:
+ cmd_fix_copyright(sys.argv[2])
diff --git a/datastore/src/Makefile.am b/datastore/src/Makefile.am
new file mode 100644
index 0000000..434face
--- /dev/null
+++ b/datastore/src/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = carquinyol
diff --git a/datastore/src/carquinyol/Makefile.am b/datastore/src/carquinyol/Makefile.am
new file mode 100644
index 0000000..7c56174
--- /dev/null
+++ b/datastore/src/carquinyol/Makefile.am
@@ -0,0 +1,23 @@
+datastoredir = $(pythondir)/carquinyol
+datastore_PYTHON = \
+ __init__.py \
+ datastore.py \
+ filestore.py \
+ indexstore.py \
+ layoutmanager.py \
+ metadatastore.py \
+ migration.py \
+ optimizer.py
+
+AM_CPPFLAGS = \
+ $(WARN_CFLAGS) \
+ $(EXT_CFLAGS) \
+ $(PYTHON_INCLUDES)
+
+AM_LDFLAGS = -module -avoid-version
+
+pkgpyexecdir = $(pythondir)/carquinyol
+pkgpyexec_LTLIBRARIES = metadatareader.la
+
+metadatareader_la_SOURCES = \
+ metadatareader.c
diff --git a/datastore/src/carquinyol/__init__.py b/datastore/src/carquinyol/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/datastore/src/carquinyol/__init__.py
diff --git a/datastore/src/carquinyol/datastore.py b/datastore/src/carquinyol/datastore.py
new file mode 100644
index 0000000..82a6207
--- /dev/null
+++ b/datastore/src/carquinyol/datastore.py
@@ -0,0 +1,354 @@
+# Copyright (C) 2008, One Laptop Per Child
+# Based on code Copyright (C) 2007, ObjectRealms, LLC
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import logging
+import uuid
+import time
+import os
+import traceback
+
+import dbus
+import dbus.service
+import gobject
+
+from sugar import mime
+
+from carquinyol import layoutmanager
+from carquinyol import migration
+from carquinyol.layoutmanager import MAX_QUERY_LIMIT
+from carquinyol.metadatastore import MetadataStore
+from carquinyol.indexstore import IndexStore
+from carquinyol.filestore import FileStore
+from carquinyol.optimizer import Optimizer
+
+# the name used by the logger
+DS_LOG_CHANNEL = 'org.laptop.sugar.DataStore'
+
+DS_SERVICE = "org.laptop.sugar.DataStore"
+DS_DBUS_INTERFACE = "org.laptop.sugar.DataStore"
+DS_OBJECT_PATH = "/org/laptop/sugar/DataStore"
+
+logger = logging.getLogger(DS_LOG_CHANNEL)
+
+
+class DataStore(dbus.service.Object):
+ """D-Bus API and logic for connecting all the other components.
+ """
+
+ def __init__(self, **options):
+ bus_name = dbus.service.BusName(DS_SERVICE,
+ bus=dbus.SessionBus(),
+ replace_existing=False,
+ allow_replacement=False)
+ dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH)
+
+ migrated = self._migrate()
+
+ self._metadata_store = MetadataStore()
+ self._file_store = FileStore()
+ self._optimizer = Optimizer(self._file_store, self._metadata_store)
+ self._index_store = IndexStore()
+ self._index_updating = False
+
+ if migrated:
+ self._rebuild_index()
+ return
+
+ try:
+ self._index_store.open_index()
+ except Exception:
+ logging.exception('Failed to open index, will rebuild')
+ self._rebuild_index()
+ return
+
+ if not self._index_store.index_updated:
+ logging.debug('Index is not up-to-date, will update')
+ self._update_index()
+
+ def _migrate(self):
+ """Check version of data store on disk and migrate if necessary.
+
+ Returns True if migration was done and an index rebuild is required,
+ False otherwise.
+ """
+ layout_manager = layoutmanager.get_instance()
+ old_version = layout_manager.get_version()
+ if old_version == layoutmanager.CURRENT_LAYOUT_VERSION:
+ return False
+
+ if old_version == 0:
+ migration.migrate_from_0()
+
+ layout_manager.set_version(layoutmanager.CURRENT_LAYOUT_VERSION)
+ return True
+
+ def _rebuild_index(self):
+ """Remove and recreate index."""
+ self._index_store.close_index()
+ self._index_store.remove_index()
+ self._index_store.open_index()
+ self._update_index()
+
+ def _update_index(self):
+ """Find entries that are not yet in the index and add them."""
+ uids = layoutmanager.get_instance().find_all()
+ logging.debug('Going to update the index with object_ids %r',
+ uids)
+ self._index_updating = True
+ gobject.idle_add(lambda: self.__update_index_cb(uids),
+ priority=gobject.PRIORITY_LOW)
+
+ def __update_index_cb(self, uids):
+ if uids:
+ uid = uids.pop()
+
+ logging.debug('Updating entry %r in index. %d to go.', uid,
+ len(uids))
+
+ if not self._index_store.contains(uid):
+ try:
+ props = self._metadata_store.retrieve(uid)
+ self._index_store.store(uid, props)
+ except Exception:
+ logging.exception('Error processing %r', uid)
+
+ if not uids:
+ self._index_store.flush()
+ self._index_updating = False
+ logging.debug('Finished updating index.')
+ return False
+ else:
+ return True
+
+ def _create_completion_cb(self, async_cb, async_err_cb, uid, exc=None):
+ logger.debug('_create_completion_cb(%r, %r, %r, %r)', async_cb,
+ async_err_cb, uid, exc)
+ if exc is not None:
+ async_err_cb(exc)
+ return
+
+ self.Created(uid)
+ self._optimizer.optimize(uid)
+ logger.debug('created %s', uid)
+ async_cb(uid)
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='a{sv}sb',
+ out_signature='s',
+ async_callbacks=('async_cb', 'async_err_cb'),
+ byte_arrays=True)
+ def create(self, props, file_path, transfer_ownership,
+ async_cb, async_err_cb):
+ uid = str(uuid.uuid4())
+ logging.debug('datastore.create %r', uid)
+
+ if not props.get('timestamp', ''):
+ props['timestamp'] = int(time.time())
+
+ self._metadata_store.store(uid, props)
+ self._index_store.store(uid, props)
+ self._file_store.store(uid, file_path, transfer_ownership,
+ lambda *args: self._create_completion_cb(async_cb,
+ async_err_cb,
+ uid,
+ *args))
+
+ @dbus.service.signal(DS_DBUS_INTERFACE, signature="s")
+ def Created(self, uid):
+ pass
+
+ def _update_completion_cb(self, async_cb, async_err_cb, uid, exc=None):
+ logger.debug('_update_completion_cb() called with %r / %r, exc %r',
+ async_cb, async_err_cb, exc)
+ if exc is not None:
+ async_err_cb(exc)
+ return
+
+ self.Updated(uid)
+ self._optimizer.optimize(uid)
+ logger.debug('updated %s', uid)
+ async_cb()
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='sa{sv}sb',
+ out_signature='',
+ async_callbacks=('async_cb', 'async_err_cb'),
+ byte_arrays=True)
+ def update(self, uid, props, file_path, transfer_ownership,
+ async_cb, async_err_cb):
+ logging.debug('datastore.update %r', uid)
+
+ if not props.get('timestamp', ''):
+ props['timestamp'] = int(time.time())
+
+ self._metadata_store.store(uid, props)
+ self._index_store.store(uid, props)
+
+ if os.path.exists(self._file_store.get_file_path(uid)) and \
+ (not file_path or os.path.exists(file_path)):
+ self._optimizer.remove(uid)
+ self._file_store.store(uid, file_path, transfer_ownership,
+ lambda *args: self._update_completion_cb(async_cb,
+ async_err_cb,
+ uid,
+ *args))
+
+ @dbus.service.signal(DS_DBUS_INTERFACE, signature="s")
+ def Updated(self, uid):
+ pass
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='a{sv}as',
+ out_signature='aa{sv}u')
+ def find(self, query, properties):
+ logging.debug('datastore.find %r', query)
+ t = time.time()
+
+ if not self._index_updating:
+ try:
+ uids, count = self._index_store.find(query)
+ except Exception:
+ logging.exception('Failed to query index, will rebuild')
+ self._rebuild_index()
+
+ if self._index_updating:
+ logging.warning('Index updating, returning all entries')
+ return self._find_all(query, properties)
+
+ entries = []
+ for uid in uids:
+ entry_path = layoutmanager.get_instance().get_entry_path(uid)
+ if not os.path.exists(entry_path):
+ logging.warning(
+ 'Inconsistency detected, returning all entries')
+ self._rebuild_index()
+ return self._find_all(query, properties)
+
+ metadata = self._metadata_store.retrieve(uid, properties)
+ entries.append(metadata)
+
+ logger.debug('find(): %r', time.time() - t)
+
+ return entries, count
+
+ def _find_all(self, query, properties):
+ uids = layoutmanager.get_instance().find_all()
+ count = len(uids)
+
+ offset = query.get('offset', 0)
+ limit = query.get('limit', MAX_QUERY_LIMIT)
+ uids = uids[offset:offset + limit]
+
+ entries = []
+ for uid in uids:
+ metadata = self._metadata_store.retrieve(uid, properties)
+ entries.append(metadata)
+
+ return entries, count
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='s',
+ out_signature='s',
+ sender_keyword='sender')
+ def get_filename(self, uid, sender=None):
+ logging.debug('datastore.get_filename %r', uid)
+ user_id = dbus.Bus().get_unix_user(sender)
+ extension = self._get_extension(uid)
+ return self._file_store.retrieve(uid, user_id, extension)
+
+ def _get_extension(self, uid):
+ mime_type = self._metadata_store.get_property(uid, 'mime_type')
+ if mime_type is None or not mime_type:
+ return ''
+ return mime.get_primary_extension(mime_type)
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='s',
+ out_signature='a{sv}')
+ def get_properties(self, uid):
+ logging.debug('datastore.get_properties %r', uid)
+ metadata = self._metadata_store.retrieve(uid)
+ return metadata
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='sa{sv}',
+ out_signature='as')
+ def get_uniquevaluesfor(self, propertyname, query=None):
+ if propertyname != 'activity':
+ raise ValueError('Only ''activity'' is a supported property name')
+ if query:
+ raise ValueError('The query parameter is not supported')
+ if not self._index_updating:
+ return self._index_store.get_activities()
+ else:
+ logging.warning('Index updating, returning an empty list')
+ return []
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature='s',
+ out_signature='')
+ def delete(self, uid):
+ self._optimizer.remove(uid)
+
+ self._index_store.delete(uid)
+ self._file_store.delete(uid)
+ self._metadata_store.delete(uid)
+
+ entry_path = layoutmanager.get_instance().get_entry_path(uid)
+ os.removedirs(entry_path)
+
+ self.Deleted(uid)
+ logger.debug('deleted %s', uid)
+
+ @dbus.service.signal(DS_DBUS_INTERFACE, signature="s")
+ def Deleted(self, uid):
+ pass
+
+ def stop(self):
+ """shutdown the service"""
+ self._index_store.close_index()
+ self.Stopped()
+
+ @dbus.service.signal(DS_DBUS_INTERFACE)
+ def Stopped(self):
+ pass
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature="sa{sv}",
+ out_signature='s')
+ def mount(self, uri, options=None):
+ return ''
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature="",
+ out_signature="aa{sv}")
+ def mounts(self):
+ return [{'id': 1}]
+
+ @dbus.service.method(DS_DBUS_INTERFACE,
+ in_signature="s",
+ out_signature="")
+ def unmount(self, mountpoint_id):
+ pass
+
+ @dbus.service.signal(DS_DBUS_INTERFACE, signature="a{sv}")
+ def Mounted(self, descriptior):
+ pass
+
+ @dbus.service.signal(DS_DBUS_INTERFACE, signature="a{sv}")
+ def Unmounted(self, descriptor):
+ pass
diff --git a/datastore/src/carquinyol/filestore.py b/datastore/src/carquinyol/filestore.py
new file mode 100644
index 0000000..9724397
--- /dev/null
+++ b/datastore/src/carquinyol/filestore.py
@@ -0,0 +1,222 @@
+# Copyright (C) 2008, One Laptop Per Child
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import os
+import errno
+import logging
+import tempfile
+
+import gobject
+
+from carquinyol import layoutmanager
+
+
+class FileStore(object):
+ """Handle the storage of one file per entry.
+ """
+
+ # TODO: add protection against store and retrieve operations on entries
+ # that are being processed async.
+
+ def store(self, uid, file_path, transfer_ownership, completion_cb):
+ """Store a file for a given entry.
+
+ """
+ dir_path = layoutmanager.get_instance().get_entry_path(uid)
+ if not os.path.exists(dir_path):
+ os.makedirs(dir_path)
+
+ destination_path = layoutmanager.get_instance().get_data_path(uid)
+ if file_path:
+ if not os.path.isfile(file_path):
+ raise ValueError('No file at %r' % file_path)
+ if transfer_ownership:
+ try:
+ logging.debug('FileStore moving from %r to %r', file_path,
+ destination_path)
+ os.rename(file_path, destination_path)
+ completion_cb()
+ except OSError, e:
+ if e.errno == errno.EXDEV:
+ self._async_copy(file_path, destination_path,
+ completion_cb, unlink_src=True)
+ else:
+ raise
+ else:
+ self._async_copy(file_path, destination_path, completion_cb,
+ unlink_src=False)
+ """
+ TODO: How can we support deleting the file of an entry?
+ elif not file_path and os.path.exists(destination_path):
+ logging.debug('FileStore: deleting %r' % destination_path)
+ os.remove(destination_path)
+ completion_cb()
+ """
+ else:
+ logging.debug('FileStore: Nothing to do')
+ completion_cb()
+
+ def _async_copy(self, file_path, destination_path, completion_cb,
+ unlink_src):
+ """Start copying a file asynchronously.
+
+ """
+ logging.debug('FileStore copying from %r to %r', file_path,
+ destination_path)
+ async_copy = AsyncCopy(file_path, destination_path, completion_cb,
+ unlink_src)
+ async_copy.start()
+
+ def retrieve(self, uid, user_id, extension):
+ """Place the file associated to a given entry into a directory
+ where the user can read it. The caller is reponsible for
+ deleting this file.
+
+ """
+ file_path = layoutmanager.get_instance().get_data_path(uid)
+ if not os.path.exists(file_path):
+ logging.debug('Entry %r doesnt have any file', uid)
+ return ''
+
+ use_instance_dir = os.path.exists('/etc/olpc-security') and \
+ os.getuid() != user_id
+ if use_instance_dir:
+ if not user_id:
+ raise ValueError('Couldnt determine the current user uid.')
+ destination_dir = os.path.join(os.environ['HOME'], 'isolation',
+ '1', 'uid_to_instance_dir', str(user_id))
+ else:
+ profile = os.environ.get('SUGAR_PROFILE', 'default')
+ destination_dir = os.path.join(os.path.expanduser('~'), '.sugar',
+ profile, 'data')
+ if not os.path.exists(destination_dir):
+ os.makedirs(destination_dir)
+
+ if extension is None:
+ extension = ''
+ elif extension:
+ extension = '.' + extension
+
+ fd, destination_path = tempfile.mkstemp(prefix=uid + '_',
+ suffix=extension, dir=destination_dir)
+ os.close(fd)
+ os.unlink(destination_path)
+
+ # Try to hard link from the original file to the targetpath. This can
+ # fail if the file is in a different filesystem. Do a symlink instead.
+ try:
+ os.link(file_path, destination_path)
+ except OSError, e:
+ if e.errno == errno.EXDEV:
+ os.symlink(file_path, destination_path)
+ else:
+ raise
+
+ # Try to make the original file readable. This can fail if the file is
+ # in a FAT filesystem.
+ try:
+ os.chmod(file_path, 0604)
+ except OSError, e:
+ if e.errno != errno.EPERM:
+ raise
+
+ return destination_path
+
+ def get_file_path(self, uid):
+ return layoutmanager.get_instance().get_data_path(uid)
+
+ def delete(self, uid):
+ """Remove the file associated to a given entry.
+
+ """
+ file_path = layoutmanager.get_instance().get_data_path(uid)
+ if os.path.exists(file_path):
+ os.remove(file_path)
+
+ def hard_link_entry(self, new_uid, existing_uid):
+ existing_file = layoutmanager.get_instance().get_data_path(existing_uid)
+ new_file = layoutmanager.get_instance().get_data_path(new_uid)
+
+ logging.debug('removing %r', new_file)
+ os.remove(new_file)
+
+ logging.debug('hard linking %r -> %r', new_file, existing_file)
+ os.link(existing_file, new_file)
+
+
+class AsyncCopy(object):
+ """Copy a file in chunks in the idle loop.
+
+ """
+ CHUNK_SIZE = 65536
+
+ def __init__(self, src, dest, completion, unlink_src=False):
+ self.src = src
+ self.dest = dest
+ self.completion = completion
+ self._unlink_src = unlink_src
+ self.src_fp = -1
+ self.dest_fp = -1
+ self.written = 0
+ self.size = 0
+
+ def _cleanup(self):
+ os.close(self.src_fp)
+ os.close(self.dest_fp)
+
+ def _copy_block(self, user_data=None):
+ try:
+ data = os.read(self.src_fp, AsyncCopy.CHUNK_SIZE)
+ count = os.write(self.dest_fp, data)
+ self.written += len(data)
+
+ # error writing data to file?
+ if count < len(data):
+ logging.error('AC: Error writing %s -> %s: wrote less than '
+ 'expected', self.src, self.dest)
+ self._complete(RuntimeError(
+ 'Error writing data to destination file'))
+ return False
+
+ # FIXME: emit progress here
+
+ # done?
+ if len(data) < AsyncCopy.CHUNK_SIZE:
+ self._complete(None)
+ return False
+ except Exception, err:
+ logging.error('AC: Error copying %s -> %s: %r', self.src, self.
+ dest, err)
+ self._complete(err)
+ return False
+
+ return True
+
+ def _complete(self, *args):
+ self._cleanup()
+ if self._unlink_src:
+ os.unlink(self.src)
+ self.completion(*args)
+
+ def start(self):
+ self.src_fp = os.open(self.src, os.O_RDONLY)
+ self.dest_fp = os.open(self.dest, os.O_RDWR | os.O_TRUNC | os.O_CREAT,
+ 0644)
+
+ stat = os.fstat(self.src_fp)
+ self.size = stat[6]
+
+ gobject.idle_add(self._copy_block)
diff --git a/datastore/src/carquinyol/indexstore.py b/datastore/src/carquinyol/indexstore.py
new file mode 100644
index 0000000..62b843b
--- /dev/null
+++ b/datastore/src/carquinyol/indexstore.py
@@ -0,0 +1,353 @@
+# Copyright (C) 2008, One Laptop Per Child
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import logging
+import os
+import sys
+
+import gobject
+import xapian
+from xapian import WritableDatabase, Document, Enquire, Query
+
+from carquinyol import layoutmanager
+from carquinyol.layoutmanager import MAX_QUERY_LIMIT
+
+_VALUE_UID = 0
+_VALUE_TIMESTAMP = 1
+_VALUE_TITLE = 2
+
+_PREFIX_NONE = 'N'
+_PREFIX_FULL_VALUE = 'F'
+_PREFIX_UID = 'Q'
+_PREFIX_ACTIVITY = 'A'
+_PREFIX_ACTIVITY_ID = 'I'
+_PREFIX_MIME_TYPE = 'M'
+_PREFIX_KEEP = 'K'
+
+# Force a flush every _n_ changes to the db
+_FLUSH_THRESHOLD = 20
+
+# Force a flush after _n_ seconds since the last change to the db
+_FLUSH_TIMEOUT = 5
+
+_PROPERTIES_NOT_TO_INDEX = ['timestamp', 'preview']
+
+_MAX_RESULTS = int(2 ** 31 - 1)
+
+_QUERY_TERM_MAP = {
+ 'uid': _PREFIX_UID,
+ 'activity': _PREFIX_ACTIVITY,
+ 'activity_id': _PREFIX_ACTIVITY_ID,
+ 'mime_type': _PREFIX_MIME_TYPE,
+ 'keep': _PREFIX_KEEP,
+}
+
+_QUERY_VALUE_MAP = {
+ 'timestamp': {'number': _VALUE_TIMESTAMP, 'type': float},
+}
+
+
+class TermGenerator (xapian.TermGenerator):
+
+ def index_document(self, document, properties):
+ document.add_value(_VALUE_TIMESTAMP,
+ xapian.sortable_serialise(float(properties['timestamp'])))
+ document.add_value(_VALUE_TITLE, properties.get('title', '').strip())
+
+ self.set_document(document)
+
+ properties = dict(properties)
+ self._index_known(document, properties)
+ self._index_unknown(document, properties)
+
+ def _index_known(self, document, properties):
+ for name, prefix in _QUERY_TERM_MAP.items():
+ if (name not in properties):
+ continue
+
+ self._index_property(document, name, properties.pop(name), prefix)
+
+ def _index_unknown(self, document, properties):
+ for name, value in properties.items():
+ self._index_property(document, name, value)
+
+ def _index_property(self, doc, name, value, prefix=''):
+ if name in _PROPERTIES_NOT_TO_INDEX or not value:
+ return
+
+ if isinstance(value, unicode):
+ value = value.encode('utf-8')
+ elif not isinstance(value, basestring):
+ value = str(value)
+
+ # We need to add the full value (i.e. not split into words) so
+ # we can enumerate unique values. It also simplifies setting up
+ # dictionary-based queries.
+ if prefix:
+ doc.add_term(_PREFIX_FULL_VALUE + prefix + value)
+
+ self.index_text(value, 1, prefix or _PREFIX_NONE)
+ self.increase_termpos()
+
+
+class QueryParser (xapian.QueryParser):
+ """QueryParser that understands dictionaries and Xapian query strings.
+
+ The dictionary contains metadata names as keys and either basic types
+ (exact match), 2-tuples (range, only valid for value-stored metadata)
+ or a list (multiple exact matches joined with OR) as values.
+ An empty dictionary matches everything. Queries from different keys
+ (i.e. different metadata names) are joined with AND.
+ """
+
+ def __init__(self):
+ xapian.QueryParser.__init__(self)
+
+ for name, prefix in _QUERY_TERM_MAP.items():
+ self.add_prefix(name, prefix)
+ self.add_prefix('', prefix)
+
+ self.add_prefix('', _PREFIX_NONE)
+
+ def _parse_query_term(self, name, prefix, value):
+ if isinstance(value, list):
+ subqueries = [self._parse_query_term(name, prefix, word)
+ for word in value]
+ return Query(Query.OP_OR, subqueries)
+
+ elif prefix:
+ return Query(_PREFIX_FULL_VALUE + prefix + str(value))
+ else:
+ return Query(_PREFIX_NONE + str(value))
+
+ def _parse_query_value_range(self, name, info, value):
+ if len(value) != 2:
+ raise TypeError(
+ 'Only tuples of size 2 have a defined meaning. '
+ 'Did you mean to pass a list instead?')
+
+ start, end = value
+ return Query(Query.OP_VALUE_RANGE, info['number'],
+ self._convert_value(info, start), self._convert_value(info, end))
+
+ def _convert_value(self, info, value):
+ if info['type'] in (float, int, long):
+ return xapian.sortable_serialise(info['type'](value))
+
+ return str(info['type'](value))
+
+ def _parse_query_value(self, name, info, value):
+ if isinstance(value, list):
+ subqueries = [self._parse_query_value(name, info, word)
+ for word in value]
+ return Query(Query.OP_OR, subqueries)
+
+ elif isinstance(value, tuple):
+ return self._parse_query_value_range(name, info, value)
+
+ elif isinstance(value, dict):
+ # compatibility option for timestamp: {'start': 0, 'end': 1}
+ start = value.get('start', 0)
+ end = value.get('end', sys.maxint)
+ return self._parse_query_value_range(name, info, (start, end))
+
+ else:
+ return self._parse_query_value_range(name, info, (value, value))
+
+ def _parse_query_xapian(self, query_str):
+ try:
+ return xapian.QueryParser.parse_query(
+ self, query_str,
+ QueryParser.FLAG_PHRASE |
+ QueryParser.FLAG_BOOLEAN |
+ QueryParser.FLAG_LOVEHATE |
+ QueryParser.FLAG_WILDCARD,
+ '')
+
+ except xapian.QueryParserError, exception:
+ logging.warning('Invalid query string: ' + exception.get_msg())
+ return Query()
+
+ def parse_query(self, query_dict, query_string):
+ logging.debug('parse_query %r %r', query_dict, query_string)
+ queries = []
+ query_dict = dict(query_dict)
+
+ if query_string is not None:
+ queries.append(self._parse_query_xapian(str(query_string)))
+
+ for name, value in query_dict.items():
+ if name in _QUERY_TERM_MAP:
+ queries.append(self._parse_query_term(name,
+ _QUERY_TERM_MAP[name], value))
+ elif name in _QUERY_VALUE_MAP:
+ queries.append(self._parse_query_value(name,
+ _QUERY_VALUE_MAP[name], value))
+ else:
+ logging.warning('Unknown term: %r=%r', name, value)
+
+ if not queries:
+ queries.append(Query(''))
+
+ logging.debug('queries: %r', [str(q) for q in queries])
+ return Query(Query.OP_AND, queries)
+
+
+class IndexStore(object):
+ """Index metadata and provide rich query facilities on it.
+ """
+
+ def __init__(self):
+ self._database = None
+ self._flush_timeout = None
+ self._pending_writes = 0
+ self._index_updated_path = os.path.join(
+ layoutmanager.get_instance().get_root_path(), 'index_updated')
+
+ def open_index(self):
+ index_path = layoutmanager.get_instance().get_index_path()
+ self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN)
+
+ def close_index(self):
+ """Close index database if it is open."""
+ if not self._database:
+ return
+
+ self._flush(True)
+ self._database = None
+
+ def remove_index(self):
+ index_path = layoutmanager.get_instance().get_index_path()
+ if not os.path.exists(index_path):
+ return
+ for f in os.listdir(index_path):
+ os.remove(os.path.join(index_path, f))
+
+ def contains(self, uid):
+ postings = self._database.postlist(_PREFIX_FULL_VALUE + \
+ _PREFIX_UID + uid)
+ try:
+ postlist_item = postings.next()
+ except StopIteration:
+ return False
+ return True
+
+ def store(self, uid, properties):
+ document = Document()
+ document.add_value(_VALUE_UID, uid)
+ term_generator = TermGenerator()
+ term_generator.index_document(document, properties)
+
+ if not self.contains(uid):
+ self._database.add_document(document)
+ else:
+ self._database.replace_document(_PREFIX_FULL_VALUE + \
+ _PREFIX_UID + uid, document)
+
+ self._flush()
+
+ def find(self, query):
+ offset = query.pop('offset', 0)
+ limit = query.pop('limit', MAX_QUERY_LIMIT)
+ order_by = query.pop('order_by', [])
+ query_string = query.pop('query', None)
+
+ query_parser = QueryParser()
+ query_parser.set_database(self._database)
+ enquire = Enquire(self._database)
+ enquire.set_query(query_parser.parse_query(query, query_string))
+
+ # This will assure that the results count is exact.
+ check_at_least = offset + limit + 1
+
+ if not order_by:
+ order_by = '+timestamp'
+ else:
+ order_by = order_by[0]
+
+ if order_by == '+timestamp':
+ enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
+ elif order_by == '-timestamp':
+ enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
+ elif order_by == '+title':
+ enquire.set_sort_by_value(_VALUE_TITLE, True)
+ elif order_by == '-title':
+ enquire.set_sort_by_value(_VALUE_TITLE, False)
+ else:
+ logging.warning('Unsupported property for sorting: %s', order_by)
+
+ query_result = enquire.get_mset(offset, limit, check_at_least)
+ total_count = query_result.get_matches_estimated()
+
+ uids = []
+ for hit in query_result:
+ uids.append(hit.document.get_value(_VALUE_UID))
+
+ return (uids, total_count)
+
+ def delete(self, uid):
+ self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
+ self._flush()
+
+ def get_activities(self):
+ activities = []
+ prefix = _PREFIX_FULL_VALUE + _PREFIX_ACTIVITY
+ for term in self._database.allterms(prefix):
+ activities.append(term.term[len(prefix):])
+ return activities
+
+ def flush(self):
+ self._flush(True)
+
+ def get_index_updated(self):
+ return os.path.exists(self._index_updated_path)
+
+ index_updated = property(get_index_updated)
+
+ def _set_index_updated(self, index_updated):
+ if index_updated != self.index_updated:
+ if index_updated:
+ index_updated_file = open(self._index_updated_path, 'w')
+ # index_updated = True will happen every
+ # indexstore._FLUSH_TIMEOUT seconds, so it is ok to fsync
+ os.fsync(index_updated_file.fileno())
+ index_updated_file.close()
+ else:
+ os.remove(self._index_updated_path)
+
+ def _flush_timeout_cb(self):
+ self._flush(True)
+ return False
+
+ def _flush(self, force=False):
+ """Called after any database mutation"""
+ logging.debug('IndexStore.flush: force=%r _pending_writes=%r',
+ force, self._pending_writes)
+
+ self._set_index_updated(False)
+
+ if self._flush_timeout is not None:
+ gobject.source_remove(self._flush_timeout)
+ self._flush_timeout = None
+
+ self._pending_writes += 1
+ if force or self._pending_writes > _FLUSH_THRESHOLD:
+ self._database.flush()
+ self._pending_writes = 0
+ self._set_index_updated(True)
+ else:
+ self._flush_timeout = gobject.timeout_add_seconds(_FLUSH_TIMEOUT,
+ self._flush_timeout_cb)
diff --git a/datastore/src/carquinyol/layoutmanager.py b/datastore/src/carquinyol/layoutmanager.py
new file mode 100644
index 0000000..5c67203
--- /dev/null
+++ b/datastore/src/carquinyol/layoutmanager.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2008, One Laptop Per Child
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import os
+import logging
+
+MAX_QUERY_LIMIT = 40960
+CURRENT_LAYOUT_VERSION = 4
+
+class LayoutManager(object):
+ """Provide the logic about how entries are stored inside the datastore
+ directory
+ """
+
+ def __init__(self):
+ profile = os.environ.get('SUGAR_PROFILE', 'default')
+ base_dir = os.path.join(os.path.expanduser('~'), '.sugar', profile)
+
+ self._root_path = os.path.join(base_dir, 'datastore')
+
+ if not os.path.exists(self._root_path):
+ os.makedirs(self._root_path)
+
+ self._create_if_needed(self.get_checksums_dir())
+ self._create_if_needed(self.get_queue_path())
+
+ def _create_if_needed(self, path):
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+ def get_version(self):
+ version_path = os.path.join(self._root_path, 'version')
+ version = 0
+ if os.path.exists(version_path):
+ try:
+ version = int(open(version_path, 'r').read())
+ except ValueError:
+ logging.exception('Can not read layout version')
+ version = 0
+
+ return version
+
+ def set_version(self, version):
+ version_path = os.path.join(self._root_path, 'version')
+ open(version_path, 'w').write(str(version))
+
+ def get_entry_path(self, uid):
+ # os.path.join() is just too slow
+ return '%s/%s/%s' % (self._root_path, uid[:2], uid)
+
+ def get_data_path(self, uid):
+ return '%s/%s/%s/data' % (self._root_path, uid[:2], uid)
+
+ def get_metadata_path(self, uid):
+ return '%s/%s/%s/metadata' % (self._root_path, uid[:2], uid)
+
+ def get_root_path(self):
+ return self._root_path
+
+ def get_index_path(self):
+ return os.path.join(self._root_path, 'index')
+
+ def get_checksums_dir(self):
+ return os.path.join(self._root_path, 'checksums')
+
+ def get_queue_path(self):
+ return os.path.join(self.get_checksums_dir(), 'queue')
+
+ def find_all(self):
+ uids = []
+ for f in os.listdir(self._root_path):
+ if os.path.isdir(os.path.join(self._root_path, f)) and len(f) == 2:
+ for g in os.listdir(os.path.join(self._root_path, f)):
+ if len(g) == 36:
+ uids.append(g)
+ return uids
+
+ def _is_empty(self):
+ """Check if there is any existing entry.
+
+ All data store layout versions are handled. Will err on the safe
+ side (i.e. return False if there might be any entry)."""
+ if os.path.exists(os.path.join(self._root_path, 'store')):
+ # unmigrated 0.82 data store
+ return False
+
+ for f in os.listdir(self._root_path):
+ if os.path.isdir(os.path.join(self._root_path, f)) and len(f) == 2:
+ for g in os.listdir(os.path.join(self._root_path, f)):
+ if len(g) == 36:
+ return False
+ return True
+
+_instance = None
+def get_instance():
+ global _instance
+ if _instance is None:
+ _instance = LayoutManager()
+ return _instance
diff --git a/datastore/src/carquinyol/metadatareader.c b/datastore/src/carquinyol/metadatareader.c
new file mode 100644
index 0000000..454c8c3
--- /dev/null
+++ b/datastore/src/carquinyol/metadatareader.c
@@ -0,0 +1,233 @@
+#include "Python.h"
+
+#include <dirent.h>
+
+// TODO: put it in a place where python can use it when writing metadata
+#define MAX_PROPERTY_LENGTH 500 * 1024
+
+static PyObject *byte_array_type = NULL;
+
+int
+add_property(const char *metadata_path, char *property_name, PyObject *dict,
+ int must_exist)
+{
+ int file_path_size;
+ char *file_path = NULL;
+ FILE *file = NULL;
+ long file_size;
+ char *value_buf = NULL;
+ PyObject *value = NULL;
+ struct stat file_stat;
+
+ // Build path of the property file
+ file_path_size = strlen(metadata_path) + 1 + strlen(property_name) + 1;
+ file_path = PyMem_Malloc(file_path_size);
+ if (file_path == NULL) {
+ PyErr_NoMemory();
+ return 0;
+ }
+ snprintf (file_path, file_path_size, "%s/%s", metadata_path, property_name);
+
+ if ((!must_exist) && (stat(file_path, &file_stat) != 0)) {
+ PyMem_Free(file_path);
+ return 1;
+ }
+
+ file = fopen(file_path, "r");
+ if (file == NULL) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Cannot open property file %s: %s",
+ file_path, strerror(errno));
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ // Get file size
+ fseek (file, 0, SEEK_END);
+ file_size = ftell (file);
+ rewind (file);
+
+ if (file_size == 0) {
+ // Empty property
+ fclose(file);
+ file = NULL;
+
+ value = PyString_FromString("");
+ if (value == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to convert value to python string");
+ goto cleanup;
+ }
+ } else {
+ if (file_size > MAX_PROPERTY_LENGTH) {
+ PyErr_SetString(PyExc_ValueError, "Property file too big");
+ goto cleanup;
+ }
+
+ // Read the whole file
+ value_buf = PyMem_Malloc(file_size);
+ if (value_buf == NULL) {
+ PyErr_NoMemory();
+ goto cleanup;
+ }
+ long read_size = fread(value_buf, 1, file_size, file);
+ if (read_size < file_size) {
+ char buf[256];
+ snprintf(buf, sizeof(buf),
+ "Error while reading property file %s", file_path);
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ fclose(file);
+ file = NULL;
+
+ // Convert value to dbus.ByteArray
+ PyObject *args = Py_BuildValue("(s#)", value_buf, file_size);
+
+ PyMem_Free(value_buf);
+ value_buf = NULL;
+
+ value = PyObject_CallObject(byte_array_type, args);
+ Py_DECREF(args);
+
+ if (value == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to convert value to dbus.ByteArray");
+ goto cleanup;
+ }
+ }
+
+ // Add property to the metadata dict
+ if (PyDict_SetItemString(dict, property_name, value) == -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "Failed to add property to dictionary");
+ goto cleanup;
+ }
+
+ Py_DECREF(value);
+ PyMem_Free(file_path);
+
+ return 1;
+
+cleanup:
+ if (file_path) {
+ PyMem_Free(file_path);
+ }
+ if (value_buf) {
+ PyMem_Free(value_buf);
+ }
+ if (file) {
+ fclose(file);
+ }
+ if (value) {
+ Py_DECREF(value);
+ }
+ return 0;
+}
+
+static PyObject *
+read_from_properties_list (const char *metadata_path, PyObject *properties)
+{
+ PyObject *dict = PyDict_New();
+
+ int i;
+ for (i = 0; i < PyList_Size(properties); i++) {
+ PyObject *property = PyList_GetItem(properties, i);
+ char *property_name = PyString_AsString (property);
+
+ if (add_property(metadata_path, property_name, dict, 0) == 0)
+ goto cleanup;
+ }
+
+ return dict;
+
+cleanup:
+ if (dict) {
+ Py_DECREF(dict);
+ }
+ return NULL;
+}
+
+static PyObject *
+read_all_properties (const char *metadata_path)
+{
+ PyObject *dict = PyDict_New();
+ DIR *dir_stream = NULL;
+ struct dirent *dir_entry = NULL;
+
+ dir_stream = opendir (metadata_path);
+ if (dir_stream == NULL) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "Couldn't open metadata directory %s",
+ metadata_path);
+ PyErr_SetString(PyExc_IOError, buf);
+ goto cleanup;
+ }
+
+ dir_entry = readdir(dir_stream);
+ while (dir_entry != NULL) {
+ // Skip . and ..
+ if (dir_entry->d_name[0] == '.' &&
+ (strlen(dir_entry->d_name) == 1 ||
+ (dir_entry->d_name[1] == '.' &&
+ strlen(dir_entry->d_name) == 2)))
+ goto next_property;
+
+ if (add_property(metadata_path, dir_entry->d_name, dict, 1) == 0)
+ goto cleanup;
+
+ next_property:
+ dir_entry = readdir(dir_stream);
+ }
+
+ closedir(dir_stream);
+
+ return dict;
+
+cleanup:
+ if (dict) {
+ Py_DECREF(dict);
+ }
+ if (dir_stream) {
+ closedir(dir_stream);
+ }
+ return NULL;
+}
+
+static PyObject *
+metadatareader_retrieve(PyObject *unused, PyObject *args)
+{
+ PyObject *dict = NULL;
+ PyObject *properties = NULL;
+ const char *metadata_path = NULL;
+
+ if (!PyArg_ParseTuple(args, "sO:retrieve", &metadata_path, &properties))
+ return NULL;
+
+ if ((properties != Py_None) && (PyList_Size(properties) > 0)) {
+ dict = read_from_properties_list(metadata_path, properties);
+ } else {
+ dict = read_all_properties(metadata_path);
+ }
+
+ return dict;
+}
+
+static PyMethodDef metadatareader_functions[] = {
+ {"retrieve", metadatareader_retrieve, METH_VARARGS,
+ PyDoc_STR("Read a dictionary from a directory with a single file " \
+ "(containing the content) per key")},
+ {NULL, NULL, 0, NULL}
+};
+
+PyMODINIT_FUNC
+initmetadatareader(void)
+{
+ PyObject* mod;
+ mod = Py_InitModule("metadatareader", metadatareader_functions);
+
+ PyObject *dbus_module = PyImport_ImportModule("dbus");
+ byte_array_type = PyObject_GetAttrString(dbus_module, "ByteArray");
+}
+
diff --git a/datastore/src/carquinyol/metadatastore.py b/datastore/src/carquinyol/metadatastore.py
new file mode 100644
index 0000000..5967017
--- /dev/null
+++ b/datastore/src/carquinyol/metadatastore.py
@@ -0,0 +1,60 @@
+import os
+
+from carquinyol import layoutmanager
+from carquinyol import metadatareader
+
+MAX_SIZE = 256
+_INTERNAL_KEYS = ['checksum']
+
+
+class MetadataStore(object):
+
+ def store(self, uid, metadata):
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ if not os.path.exists(metadata_path):
+ os.makedirs(metadata_path)
+ else:
+ for key in os.listdir(metadata_path):
+ if key not in _INTERNAL_KEYS:
+ os.remove(os.path.join(metadata_path, key))
+
+ metadata['uid'] = uid
+ for key, value in metadata.items():
+
+ # Hack to support activities that still pass properties named as
+ # for example title:text.
+ if ':' in key:
+ key = key.split(':', 1)[0]
+
+ f = open(os.path.join(metadata_path, key), 'w')
+ try:
+ if isinstance(value, unicode):
+ value = value.encode('utf-8')
+ elif not isinstance(value, basestring):
+ value = str(value)
+ f.write(value)
+ finally:
+ f.close()
+
+ def retrieve(self, uid, properties=None):
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ return metadatareader.retrieve(metadata_path, properties)
+
+ def delete(self, uid):
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ for key in os.listdir(metadata_path):
+ os.remove(os.path.join(metadata_path, key))
+ os.rmdir(metadata_path)
+
+ def get_property(self, uid, key):
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ property_path = os.path.join(metadata_path, key)
+ if os.path.exists(property_path):
+ return open(property_path, 'r').read()
+ else:
+ return None
+
+ def set_property(self, uid, key, value):
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ property_path = os.path.join(metadata_path, key)
+ open(property_path, 'w').write(value)
diff --git a/datastore/src/carquinyol/migration.py b/datastore/src/carquinyol/migration.py
new file mode 100644
index 0000000..95ee391
--- /dev/null
+++ b/datastore/src/carquinyol/migration.py
@@ -0,0 +1,103 @@
+# Copyright (C) 2008, One Laptop Per Child
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+"""Transform one DataStore directory in a newer format.
+"""
+
+import os
+import logging
+import shutil
+import time
+import traceback
+
+import cjson
+
+from carquinyol import layoutmanager
+
+DATE_FORMAT = '%Y-%m-%dT%H:%M:%S'
+
+
+def migrate_from_0():
+ logging.info('Migrating datastore from version 0 to version 1')
+
+ root_path = layoutmanager.get_instance().get_root_path()
+ old_root_path = os.path.join(root_path, 'store')
+ if not os.path.exists(old_root_path):
+ return
+
+ for f in os.listdir(old_root_path):
+ uid, ext = os.path.splitext(f)
+ if ext != '.metadata':
+ continue
+
+ logging.debug('Migrating entry %r', uid)
+ try:
+ _migrate_metadata(root_path, old_root_path, uid)
+ _migrate_file(root_path, old_root_path, uid)
+ _migrate_preview(root_path, old_root_path, uid)
+ except Exception:
+ logging.exception('Error while migrating entry %r', uid)
+
+ # Just be paranoid, it's cheap.
+ if old_root_path.endswith('datastore/store'):
+ shutil.rmtree(old_root_path)
+
+ logging.info('Migration finished')
+
+
+def _migrate_metadata(root_path, old_root_path, uid):
+ dir_path = layoutmanager.get_instance().get_entry_path(uid)
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ os.makedirs(metadata_path)
+
+ old_metadata_path = os.path.join(old_root_path, uid + '.metadata')
+ metadata = cjson.decode(open(old_metadata_path, 'r').read())
+
+ if 'uid' not in metadata:
+ metadata['uid'] = uid
+
+ if 'timestamp' not in metadata and 'mtime' in metadata:
+ metadata['timestamp'] = \
+ time.mktime(time.strptime(metadata['mtime'], DATE_FORMAT))
+
+ for key, value in metadata.items():
+ try:
+ f = open(os.path.join(metadata_path, key), 'w')
+ try:
+ if isinstance(value, unicode):
+ value = value.encode('utf-8')
+ if not isinstance(value, basestring):
+ value = str(value)
+ f.write(value)
+ finally:
+ f.close()
+ except Exception:
+ logging.exception(
+ 'Error while migrating property %s of entry %s', key, uid)
+
+
+def _migrate_file(root_path, old_root_path, uid):
+ if os.path.exists(os.path.join(old_root_path, uid)):
+ new_data_path = layoutmanager.get_instance().get_data_path(uid)
+ os.rename(os.path.join(old_root_path, uid),
+ new_data_path)
+
+
+def _migrate_preview(root_path, old_root_path, uid):
+ dir_path = layoutmanager.get_instance().get_entry_path(uid)
+ metadata_path = layoutmanager.get_instance().get_metadata_path(uid)
+ os.rename(os.path.join(old_root_path, 'preview', uid),
+ os.path.join(metadata_path, 'preview'))
diff --git a/datastore/src/carquinyol/optimizer.py b/datastore/src/carquinyol/optimizer.py
new file mode 100644
index 0000000..2b6ce29
--- /dev/null
+++ b/datastore/src/carquinyol/optimizer.py
@@ -0,0 +1,167 @@
+# Copyright (C) 2008, One Laptop Per Child
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import os
+import errno
+import logging
+import subprocess
+
+import gobject
+
+from carquinyol import layoutmanager
+
+
+class Optimizer(object):
+ """Optimizes disk space usage by detecting duplicates and sharing storage.
+ """
+
+ def __init__(self, file_store, metadata_store):
+ self._file_store = file_store
+ self._metadata_store = metadata_store
+ self._enqueue_checksum_id = None
+
+ def optimize(self, uid):
+ """Add an entry to a queue of entries to be checked for duplicates.
+
+ """
+ if not os.path.exists(self._file_store.get_file_path(uid)):
+ return
+
+ queue_path = layoutmanager.get_instance().get_queue_path()
+ open(os.path.join(queue_path, uid), 'w').close()
+ logging.debug('optimize %r', os.path.join(queue_path, uid))
+
+ if self._enqueue_checksum_id is None:
+ self._enqueue_checksum_id = \
+ gobject.idle_add(self._process_entry_cb,
+ priority=gobject.PRIORITY_LOW)
+
+ def remove(self, uid):
+ """Remove any structures left from space optimization
+
+ """
+ checksum = self._metadata_store.get_property(uid, 'checksum')
+ if checksum is None:
+ return
+
+ checksums_dir = layoutmanager.get_instance().get_checksums_dir()
+ checksum_path = os.path.join(checksums_dir, checksum)
+ checksum_entry_path = os.path.join(checksum_path, uid)
+
+ if os.path.exists(checksum_entry_path):
+ logging.debug('remove %r', checksum_entry_path)
+ os.remove(checksum_entry_path)
+
+ if os.path.exists(checksum_path):
+ try:
+ os.rmdir(checksum_path)
+ logging.debug('removed %r', checksum_path)
+ except OSError, e:
+ if e.errno != errno.ENOTEMPTY:
+ raise
+
+ def _identical_file_already_exists(self, checksum):
+ """Check if we already have files with this checksum.
+
+ """
+ checksums_dir = layoutmanager.get_instance().get_checksums_dir()
+ checksum_path = os.path.join(checksums_dir, checksum)
+ return os.path.exists(checksum_path)
+
+ def _get_uid_from_checksum(self, checksum):
+ """Get an existing entry which file matches checksum.
+
+ """
+ checksums_dir = layoutmanager.get_instance().get_checksums_dir()
+ checksum_path = os.path.join(checksums_dir, checksum)
+ first_uid = os.listdir(checksum_path)[0]
+ return first_uid
+
+ def _create_checksum_dir(self, checksum):
+ """Create directory that tracks files with this same checksum.
+
+ """
+ checksums_dir = layoutmanager.get_instance().get_checksums_dir()
+ checksum_path = os.path.join(checksums_dir, checksum)
+ logging.debug('create dir %r', checksum_path)
+ os.mkdir(checksum_path)
+
+ def _add_checksum_entry(self, uid, checksum):
+ """Create a file in the checksum dir with the uid of the entry
+
+ """
+ checksums_dir = layoutmanager.get_instance().get_checksums_dir()
+ checksum_path = os.path.join(checksums_dir, checksum)
+
+ logging.debug('touch %r', os.path.join(checksum_path, uid))
+ open(os.path.join(checksum_path, uid), 'w').close()
+
+ def _already_linked(self, uid, checksum):
+ """Check if this entry's file is already a hard link to the checksums
+ dir.
+
+ """
+ checksums_dir = layoutmanager.get_instance().get_checksums_dir()
+ checksum_path = os.path.join(checksums_dir, checksum)
+ return os.path.exists(os.path.join(checksum_path, uid))
+
+ def _process_entry_cb(self):
+ """Process one item in the checksums queue by calculating its checksum,
+ checking if there exist already an identical file, and in that case
+ substituting its file with a hard link to that pre-existing file.
+
+ """
+ queue_path = layoutmanager.get_instance().get_queue_path()
+ queue = os.listdir(queue_path)
+ if queue:
+ uid = queue[0]
+ logging.debug('_process_entry_cb processing %r', uid)
+
+ file_in_entry_path = self._file_store.get_file_path(uid)
+ if not os.path.exists(file_in_entry_path):
+ logging.info('non-existent entry in queue: %r', uid)
+ else:
+ checksum = self._calculate_md5sum(file_in_entry_path)
+ self._metadata_store.set_property(uid, 'checksum', checksum)
+
+ if self._identical_file_already_exists(checksum):
+ if not self._already_linked(uid, checksum):
+ existing_entry_uid = \
+ self._get_uid_from_checksum(checksum)
+
+ self._file_store.hard_link_entry(uid,
+ existing_entry_uid)
+
+ self._add_checksum_entry(uid, checksum)
+ else:
+ self._create_checksum_dir(checksum)
+ self._add_checksum_entry(uid, checksum)
+
+ os.remove(os.path.join(queue_path, uid))
+
+ if len(queue) <= 1:
+ self._enqueue_checksum_id = None
+ return False
+ else:
+ return True
+
+ def _calculate_md5sum(self, path):
+ """Calculate the md5 checksum of a given file.
+
+ """
+ popen = subprocess.Popen(['md5sum', path], stdout=subprocess.PIPE)
+ stdout, stderr_ = popen.communicate()
+ return stdout.split(' ', 1)[0]