Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/djvu
diff options
context:
space:
mode:
authorNickolay V. Shmyrev <nshmyrev@src.gnome.org>2006-02-19 20:22:57 (GMT)
committer Nickolay V. Shmyrev <nshmyrev@src.gnome.org>2006-02-19 20:22:57 (GMT)
commit2fcdb759f53fee8ccca7eaa3d67743cc8d42b0fc (patch)
tree71391a6714d3dc4f23af219787bf8421804e0374 /djvu
parent23bdde922d86e95c6948bd2106277d019cbda1f3 (diff)
Text search in djvu. Fix for the bug #303415.
* configure.ac: * djvu/Makefile.am: * djvu/djvu-document-private.h: * djvu/djvu-document.c: (G_DEFINE_TYPE_WITH_CODE), (djvu_handle_events), (djvu_document_load), (djvu_document_save), (djvu_document_get_page_size), (djvu_document_render_pixbuf), (djvu_document_can_get_text), (djvu_document_get_text), (djvu_document_document_iface_init), (djvu_document_thumbnails_get_thumbnail), (djvu_document_find_begin), (djvu_document_find_get_n_results), (djvu_document_find_get_result), (djvu_document_find_page_has_results), (djvu_document_find_get_progress), (djvu_document_find_cancel), (djvu_document_find_iface_init): * djvu/djvu-text-page.c: (djvu_text_page_selection_process), (djvu_text_page_selection), (djvu_text_page_limits_process), (djvu_text_page_limits), (djvu_text_page_copy), (djvu_text_page_position), (djvu_text_page_union), (djvu_text_page_sexpr_process), (djvu_text_page_sexpr), (djvu_text_page_box), (djvu_text_page_append_text), (djvu_text_page_search), (djvu_text_page_prepare_search), (djvu_text_page_new), (djvu_text_page_free): * djvu/djvu-text-page.h: * djvu/djvu-text.c: (djvu_text_idle_callback), (djvu_text_new), (djvu_text_copy), (djvu_text_free), (djvu_text_get_text), (djvu_text_n_results), (djvu_text_has_results), (djvu_text_get_result), (djvu_text_get_progress): * djvu/djvu-text.h: Text search in djvu. Fix for the bug #303415.
Diffstat (limited to 'djvu')
-rw-r--r--djvu/Makefile.am7
-rw-r--r--djvu/djvu-document-private.h45
-rw-r--r--djvu/djvu-document.c213
-rw-r--r--djvu/djvu-text-page.c444
-rw-r--r--djvu/djvu-text-page.h59
-rw-r--r--djvu/djvu-text.c298
-rw-r--r--djvu/djvu-text.h50
7 files changed, 1079 insertions, 37 deletions
diff --git a/djvu/Makefile.am b/djvu/Makefile.am
index 7afa523..478943f 100644
--- a/djvu/Makefile.am
+++ b/djvu/Makefile.am
@@ -9,7 +9,12 @@ noinst_LTLIBRARIES = libgtkdjvu.la
libgtkdjvu_la_SOURCES = \
djvu-document.c \
- djvu-document.h
+ djvu-document.h \
+ djvu-document-private.h \
+ djvu-text.c \
+ djvu-text.h \
+ djvu-text-page.c \
+ djvu-text-page.h
libgtkdjvu_la_LIBADD = -lpthread -ldjvulibre
diff --git a/djvu/djvu-document-private.h b/djvu/djvu-document-private.h
new file mode 100644
index 0000000..3fa579f
--- /dev/null
+++ b/djvu/djvu-document-private.h
@@ -0,0 +1,45 @@
+/*
+ * Declarations used throughout the djvu classes
+ *
+ * Copyright (C) 2006, Michael Hofmann <mh21@piware.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __DJVU_DOCUMENT_INTERNAL_H__
+#define __DJVU_DOCUMENT_INTERNAL_H__
+
+#include "djvu-document.h"
+#include "djvu-text.h"
+
+#include <libdjvu/ddjvuapi.h>
+
+struct _DjvuDocument {
+ GObject parent_instance;
+
+ ddjvu_context_t *d_context;
+ ddjvu_document_t *d_document;
+ ddjvu_format_t *d_format;
+
+ gchar *uri;
+
+ DjvuText *search;
+};
+
+int djvu_document_get_n_pages (EvDocument *document);
+void djvu_handle_events (DjvuDocument *djvu_document,
+ int wait);
+
+#endif /* __DJVU_DOCUMENT_INTERNAL_H__ */
diff --git a/djvu/djvu-document.c b/djvu/djvu-document.c
index 135d81c..e50fb8f 100644
--- a/djvu/djvu-document.c
+++ b/djvu/djvu-document.c
@@ -18,12 +18,17 @@
*/
#include "djvu-document.h"
+#include "djvu-text.h"
+#include "djvu-document-private.h"
#include "ev-document-thumbnails.h"
#include "ev-document-misc.h"
+#include "ev-document-find.h"
#include <libdjvu/ddjvuapi.h>
#include <gtk/gtk.h>
#include <gdk-pixbuf/gdk-pixbuf-core.h>
+#include <glib/gunicode.h>
+#include <string.h>
#define SCALE_FACTOR 0.2
@@ -37,33 +42,50 @@ struct _DjvuDocumentClass
GObjectClass parent_class;
};
-struct _DjvuDocument
-{
- GObject parent_instance;
-
- ddjvu_context_t *d_context;
- ddjvu_document_t *d_document;
- ddjvu_format_t *d_format;
-
- gchar *uri;
-};
-
typedef struct _DjvuDocumentClass DjvuDocumentClass;
static void djvu_document_document_iface_init (EvDocumentIface *iface);
static void djvu_document_document_thumbnails_iface_init (EvDocumentThumbnailsIface *iface);
+static void djvu_document_find_iface_init (EvDocumentFindIface *iface);
G_DEFINE_TYPE_WITH_CODE
(DjvuDocument, djvu_document, G_TYPE_OBJECT,
{
G_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT, djvu_document_document_iface_init);
G_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_THUMBNAILS, djvu_document_document_thumbnails_iface_init)
+ G_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_FIND, djvu_document_find_iface_init);
});
+
+void
+djvu_handle_events (DjvuDocument *djvu_document, int wait)
+{
+ ddjvu_context_t *ctx = djvu_document->d_context;
+ const ddjvu_message_t *msg;
+ if (!ctx)
+ return;
+ if (wait)
+ msg = ddjvu_message_wait (ctx);
+ while ((msg = ddjvu_message_peek (ctx))) {
+ switch (msg->m_any.tag) {
+ case DDJVU_ERROR:
+ g_warning ("DjvuLibre error: %s",
+ msg->m_error.message);
+ if (msg->m_error.filename)
+ g_warning ("DjvuLibre error: %s:%d",
+ msg->m_error.filename,
+ msg->m_error.lineno);
+ default:
+ break;
+ }
+ ddjvu_message_pop (ctx);
+ }
+}
+
static gboolean
djvu_document_load (EvDocument *document,
- const char *uri,
- GError **error)
+ const char *uri,
+ GError **error)
{
DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
ddjvu_document_t *doc;
@@ -83,10 +105,8 @@ djvu_document_load (EvDocument *document,
djvu_document->d_document = doc;
- while (!ddjvu_document_decoding_done (djvu_document->d_document)) {
- ddjvu_message_wait (djvu_document->d_context);
- ddjvu_message_pop (djvu_document->d_context);
- }
+ while (!ddjvu_document_decoding_done (djvu_document->d_document))
+ djvu_handle_events(djvu_document, TRUE);
g_free (djvu_document->uri);
djvu_document->uri = g_strdup (uri);
@@ -96,15 +116,15 @@ djvu_document_load (EvDocument *document,
static gboolean
djvu_document_save (EvDocument *document,
- const char *uri,
- GError **error)
+ const char *uri,
+ GError **error)
{
DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
return ev_xfer_uri_simple (djvu_document->uri, uri, error);
}
-static int
+int
djvu_document_get_n_pages (EvDocument *document)
{
DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
@@ -116,19 +136,17 @@ djvu_document_get_n_pages (EvDocument *document)
static void
djvu_document_get_page_size (EvDocument *document,
- int page,
- double *width,
- double *height)
+ int page,
+ double *width,
+ double *height)
{
DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
ddjvu_pageinfo_t info;
g_return_if_fail (djvu_document->d_document);
- while (ddjvu_document_get_pageinfo(djvu_document->d_document, page, &info) < DDJVU_JOB_OK) {
- ddjvu_message_wait (djvu_document->d_context);
- ddjvu_message_pop (djvu_document->d_context);
- }
+ while (ddjvu_document_get_pageinfo(djvu_document->d_document, page, &info) < DDJVU_JOB_OK)
+ djvu_handle_events(djvu_document, TRUE);
*width = info.width * SCALE_FACTOR;
*height = info.height * SCALE_FACTOR;
@@ -150,10 +168,8 @@ djvu_document_render_pixbuf (EvDocument *document,
d_page = ddjvu_page_create_by_pageno (djvu_document->d_document, rc->page);
- while (!ddjvu_page_decoding_done (d_page)) {
- ddjvu_message_wait (djvu_document->d_context);
- ddjvu_message_pop (djvu_document->d_context);
- }
+ while (!ddjvu_page_decoding_done (d_page))
+ djvu_handle_events(djvu_document, TRUE);
page_width = ddjvu_page_get_width (d_page) * rc->scale * SCALE_FACTOR;
page_height = ddjvu_page_get_height (d_page) * rc->scale * SCALE_FACTOR;
@@ -203,7 +219,30 @@ djvu_document_class_init (DjvuDocumentClass *klass)
static gboolean
djvu_document_can_get_text (EvDocument *document)
{
- return FALSE;
+ return TRUE;
+}
+
+
+static char *
+djvu_document_get_text (EvDocument *document, int page, EvRectangle *rect)
+{
+ DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
+ double width, height;
+ EvRectangle rectangle;
+ char* text;
+
+ djvu_document_get_page_size (document, page, &width, &height);
+ rectangle.x1 = rect->x1 / SCALE_FACTOR;
+ rectangle.y1 = (height - rect->y2) / SCALE_FACTOR;
+ rectangle.x2 = rect->x2 / SCALE_FACTOR;
+ rectangle.y2 = (height - rect->y1) / SCALE_FACTOR;
+
+ text = djvu_text_copy (djvu_document, page, &rectangle);
+
+ if (text == NULL)
+ text = g_strdup ("");
+
+ return text;
}
static EvDocumentInfo *
@@ -222,6 +261,7 @@ djvu_document_document_iface_init (EvDocumentIface *iface)
iface->load = djvu_document_load;
iface->save = djvu_document_save;
iface->can_get_text = djvu_document_can_get_text;
+ iface->get_text = djvu_document_get_text;
iface->get_n_pages = djvu_document_get_n_pages;
iface->get_page_size = djvu_document_get_page_size;
iface->render_pixbuf = djvu_document_render_pixbuf;
@@ -270,10 +310,8 @@ djvu_document_thumbnails_get_thumbnail (EvDocumentThumbnails *document,
gdk_pixbuf_fill (pixbuf, 0xffffffff);
pixels = gdk_pixbuf_get_pixels (pixbuf);
- while (ddjvu_thumbnail_status (djvu_document->d_document, page, 1) < DDJVU_JOB_OK) {
- ddjvu_message_wait (djvu_document->d_context);
- ddjvu_message_pop (djvu_document->d_context);
- }
+ while (ddjvu_thumbnail_status (djvu_document->d_document, page, 1) < DDJVU_JOB_OK)
+ djvu_handle_events(djvu_document, TRUE);
ddjvu_thumbnail_render (djvu_document->d_document, page,
&thumb_width, &thumb_height,
@@ -310,3 +348,106 @@ djvu_document_init (DjvuDocument *djvu_document)
djvu_document->d_document = NULL;
}
+static void
+djvu_document_find_begin (EvDocumentFind *document,
+ int page,
+ const char *search_string,
+ gboolean case_sensitive)
+{
+ DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
+
+ if (djvu_document->search &&
+ strcmp (search_string, djvu_text_get_text (djvu_document->search)) == 0)
+ return;
+
+ if (djvu_document->search)
+ djvu_text_free (djvu_document->search);
+
+ djvu_document->search = djvu_text_new (djvu_document,
+ page,
+ case_sensitive,
+ search_string);
+}
+
+static int
+djvu_document_find_get_n_results (EvDocumentFind *document_find, int page)
+{
+ DjvuText *search = DJVU_DOCUMENT (document_find)->search;
+
+ if (search) {
+ return djvu_text_n_results (search, page);
+ } else {
+ return 0;
+ }
+}
+
+static gboolean
+djvu_document_find_get_result (EvDocumentFind *document_find,
+ int page,
+ int n_result,
+ EvRectangle *rectangle)
+{
+ DjvuDocument *djvu_document = DJVU_DOCUMENT (document_find);
+ DjvuText *search = djvu_document->search;
+ EvRectangle *r;
+ double width, height;
+
+ if (search == NULL)
+ return FALSE;
+
+ r = djvu_text_get_result (search, page, n_result);
+ if (r == NULL)
+ return FALSE;
+
+ djvu_document_get_page_size (EV_DOCUMENT (djvu_document),
+ page, &width, &height);
+ rectangle->x1 = r->x1 * SCALE_FACTOR;
+ rectangle->y1 = height - r->y2 * SCALE_FACTOR;
+ rectangle->x2 = r->x2 * SCALE_FACTOR;
+ rectangle->y2 = height - r->y1 * SCALE_FACTOR;
+
+ return TRUE;
+}
+
+static int
+djvu_document_find_page_has_results (EvDocumentFind *document_find,
+ int page)
+{
+ DjvuText *search = DJVU_DOCUMENT (document_find)->search;
+
+ return search && djvu_text_has_results (search, page);
+}
+
+static double
+djvu_document_find_get_progress (EvDocumentFind *document_find)
+{
+ DjvuText *search = DJVU_DOCUMENT (document_find)->search;
+
+ if (search == NULL) {
+ return 0;
+ }
+
+ return djvu_text_get_progress (search);
+}
+
+static void
+djvu_document_find_cancel (EvDocumentFind *document)
+{
+ DjvuDocument *djvu_document = DJVU_DOCUMENT (document);
+
+ if (djvu_document->search) {
+ djvu_text_free (djvu_document->search);
+ djvu_document->search = NULL;
+ }
+}
+
+static void
+djvu_document_find_iface_init (EvDocumentFindIface *iface)
+{
+ iface->begin = djvu_document_find_begin;
+ iface->get_n_results = djvu_document_find_get_n_results;
+ iface->get_result = djvu_document_find_get_result;
+ iface->page_has_results = djvu_document_find_page_has_results;
+ iface->get_progress = djvu_document_find_get_progress;
+ iface->cancel = djvu_document_find_cancel;
+}
diff --git a/djvu/djvu-text-page.c b/djvu/djvu-text-page.c
new file mode 100644
index 0000000..c19d6f6
--- /dev/null
+++ b/djvu/djvu-text-page.c
@@ -0,0 +1,444 @@
+/*
+ * Implements search and copy functionality for Djvu files.
+ * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "djvu-text-page.h"
+
+#include <libdjvu/miniexp.h>
+
+/**
+ * djvu_text_page_selection_process:
+ * @page: #DjvuTextPage instance
+ * @p: s-expression to append
+ * @delimit: character/word/... delimiter
+ *
+ * Appends the string in @p to the page text.
+ *
+ * Returns: whether the end was not reached in this s-expression
+ */
+static gboolean
+djvu_text_page_selection_process (DjvuTextPage *page,
+ miniexp_t p,
+ int delimit)
+{
+ if (page->text || p == page->start) {
+ char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p));
+ if (page->text) {
+ char *new_text =
+ g_strjoin (delimit & 2 ? "\n" :
+ delimit & 1 ? " " : NULL,
+ page->text, token_text,
+ NULL);
+ g_free (page->text);
+ page->text = new_text;
+ } else
+ page->text = g_strdup (token_text);
+ if (p == page->end)
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/**
+ * djvu_text_page_selection:
+ * @page: #DjvuTextPage instance
+ * @p: tree to append
+ * @delimit: character/word/... delimiter
+ *
+ * Walks the tree in @p and appends the text with
+ * djvu_text_page_selection_process() for all s-expressions
+ * between the start and end fields.
+ *
+ * Returns: whether the end was not reached in this subtree
+ */
+static gboolean
+djvu_text_page_selection (DjvuTextPage *page,
+ miniexp_t p,
+ int delimit)
+{
+ g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
+ (miniexp_car (p)), FALSE);
+
+ if (miniexp_car (p) != page->char_symbol)
+ delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2;
+
+ miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
+ while (deeper != miniexp_nil) {
+ miniexp_t str = miniexp_car (deeper);
+ if (miniexp_stringp (str)) {
+ if (!djvu_text_page_selection_process
+ (page, p, delimit))
+ return FALSE;
+ } else {
+ if (!djvu_text_page_selection
+ (page, str, delimit))
+ return FALSE;
+ }
+ delimit = 0;
+ deeper = miniexp_cdr (deeper);
+ }
+ return TRUE;
+}
+
+static void
+djvu_text_page_limits_process (DjvuTextPage *page,
+ miniexp_t p,
+ EvRectangle *rect)
+{
+ EvRectangle current;
+
+ current.x1 = miniexp_to_int (miniexp_nth (1, p));
+ current.y1 = miniexp_to_int (miniexp_nth (2, p));
+ current.x2 = miniexp_to_int (miniexp_nth (3, p));
+ current.y2 = miniexp_to_int (miniexp_nth (4, p));
+ if (current.x2 >= rect->x1 && current.y1 <= rect->y2 &&
+ current.x1 <= rect->x2 && current.y2 >= rect->y1) {
+ if (page->start == miniexp_nil)
+ page->start = p;
+ page->end = p;
+ }
+}
+
+
+static void
+djvu_text_page_limits (DjvuTextPage *page,
+ miniexp_t p,
+ EvRectangle *rect)
+{
+ char *token_text;
+
+ g_return_if_fail (miniexp_consp (p) &&
+ miniexp_symbolp (miniexp_car (p)));
+
+ miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
+ while (deeper != miniexp_nil) {
+ miniexp_t str = miniexp_car (deeper);
+ if (miniexp_stringp (str))
+ djvu_text_page_limits_process (page, p, rect);
+ else
+ djvu_text_page_limits (page, str, rect);
+
+ deeper = miniexp_cdr (deeper);
+ }
+}
+
+char *
+djvu_text_page_copy (DjvuTextPage *page,
+ EvRectangle *rectangle)
+{
+ char* text;
+
+ page->start = miniexp_nil;
+ page->end = miniexp_nil;
+ djvu_text_page_limits (page, page->text_structure, rectangle);
+ djvu_text_page_selection (page, page->text_structure, 0);
+
+ /* Do not free the string */
+ text = page->text;
+ page->text = NULL;
+
+ return text;
+}
+
+/**
+ * djvu_text_page_position:
+ * @page: #DjvuTextPage instance
+ * @position: index in the page text
+ *
+ * Returns the closest s-expression that contains the given position in
+ * the page text.
+ *
+ * Returns: closest s-expression
+ */
+static miniexp_t
+djvu_text_page_position (DjvuTextPage *page,
+ int position)
+{
+ GArray *links = page->links;
+ int low = 0;
+ int hi = links->len - 1;
+ int mid = 0;
+
+ g_return_val_if_fail (hi >= 0, miniexp_nil);
+
+ /* Shamelessly copied from GNU classpath */
+ while (low <= hi) {
+ mid = (low + hi) >> 1;
+ DjvuTextLink *link =
+ &g_array_index (links, DjvuTextLink, mid);
+ if (link->position == position)
+ break;
+ else if (link->position > position)
+ hi = --mid;
+ else
+ low = mid + 1;
+ }
+
+ return g_array_index (page->links, DjvuTextLink, mid).pair;
+}
+
+/**
+ * djvu_text_page_union:
+ * @target: first rectangle and result
+ * @source: second rectangle
+ *
+ * Calculates the bounding box of two rectangles and stores the reuslt
+ * in the first.
+ */
+static void
+djvu_text_page_union (EvRectangle *target,
+ EvRectangle *source)
+{
+ if (source->x1 < target->x1)
+ target->x1 = source->x1;
+ if (source->x2 > target->x2)
+ target->x2 = source->x2;
+ if (source->y1 < target->y1)
+ target->y1 = source->y1;
+ if (source->y2 > target->y2)
+ target->y2 = source->y2;
+}
+
+/**
+ * djvu_text_page_sexpr_process:
+ * @page: #DjvuTextPage instance
+ * @p: s-expression to append
+ * @start: first s-expression in the selection
+ * @end: last s-expression in the selection
+ *
+ * Appends the rectangle defined by @p to the internal bounding box rectangle.
+ *
+ * Returns: whether the end was not reached in this s-expression
+ */
+static gboolean
+djvu_text_page_sexpr_process (DjvuTextPage *page,
+ miniexp_t p,
+ miniexp_t start,
+ miniexp_t end)
+{
+ if (page->bounding_box || p == start) {
+ EvRectangle *new_rectangle = g_new (EvRectangle, 1);
+ new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p));
+ new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p));
+ new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p));
+ new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p));
+ if (page->bounding_box) {
+ djvu_text_page_union (page->bounding_box,
+ new_rectangle);
+ g_free (new_rectangle);
+ } else
+ page->bounding_box = new_rectangle;
+ if (p == end)
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/**
+ * djvu_text_page_sexpr:
+ * @page: #DjvuTextPage instance
+ * @p: tree to append
+ * @start: first s-expression in the selection
+ * @end: last s-expression in the selection
+ *
+ * Walks the tree in @p and extends the rectangle with
+ * djvu_text_page_process() for all s-expressions between @start and @end.
+ *
+ * Returns: whether the end was not reached in this subtree
+ */
+static gboolean
+djvu_text_page_sexpr (DjvuTextPage *page,
+ miniexp_t p,
+ miniexp_t start,
+ miniexp_t end)
+{
+ g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp
+ (miniexp_car (p)), FALSE);
+
+ miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
+ while (deeper != miniexp_nil) {
+ miniexp_t str = miniexp_car (deeper);
+ if (miniexp_stringp (str)) {
+ if (!djvu_text_page_sexpr_process
+ (page, p, start, end))
+ return FALSE;
+ } else {
+ if (!djvu_text_page_sexpr
+ (page, str, start, end))
+ return FALSE;
+ }
+ deeper = miniexp_cdr (deeper);
+ }
+ return TRUE;
+}
+
+/**
+ * djvu_text_page_box:
+ * @page: #DjvuTextPage instance
+ * @start: first s-expression in the selection
+ * @end: last s-expression in the selection
+ *
+ * Builds a rectangle that contains all s-expressions in the given range.
+ */
+static EvRectangle *
+djvu_text_page_box (DjvuTextPage *page,
+ miniexp_t start,
+ miniexp_t end)
+{
+ page->bounding_box = NULL;
+ djvu_text_page_sexpr (page, page->text_structure, start, end);
+ return page->bounding_box;
+}
+
+/**
+ * djvu_text_page_append_search:
+ * @page: #DjvuTextPage instance
+ * @p: tree to append
+ * @case_sensitive: do not ignore case
+ * @delimit: insert spaces because of higher (sentence/paragraph/...) break
+ *
+ * Appends the tree in @p to the internal text string.
+ */
+static void
+djvu_text_page_append_text (DjvuTextPage *page,
+ miniexp_t p,
+ gboolean case_sensitive,
+ gboolean delimit)
+{
+ char *token_text;
+
+ g_return_if_fail (miniexp_consp (p) &&
+ miniexp_symbolp (miniexp_car (p)));
+
+ delimit |= page->char_symbol != miniexp_car (p);
+
+ miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p));
+ while (deeper != miniexp_nil) {
+ miniexp_t data = miniexp_car (deeper);
+ if (miniexp_stringp (data)) {
+ DjvuTextLink link;
+ link.position = page->text == NULL ? 0 :
+ strlen (page->text);
+ link.pair = p;
+ g_array_append_val (page->links, link);
+
+ token_text = (char *) miniexp_to_str (data);
+ if (!case_sensitive)
+ token_text = g_utf8_casefold (token_text, -1);
+ if (page->text == NULL)
+ page->text = g_strdup (token_text);
+ else {
+ char *new_text =
+ g_strjoin (delimit ? " " : NULL,
+ page->text, token_text,
+ NULL);
+ g_free (page->text);
+ page->text = new_text;
+ }
+ if (!case_sensitive)
+ g_free (token_text);
+ } else
+ djvu_text_page_append_text (page, data,
+ case_sensitive, delimit);
+ delimit = FALSE;
+ deeper = miniexp_cdr (deeper);
+ }
+}
+
+/**
+ * djvu_text_page_search:
+ * @page: #DjvuTextPage instance
+ * @text: text to search
+ *
+ * Searches the page for the given text. The results list has to be
+ * externally freed afterwards.
+ */
+void
+djvu_text_page_search (DjvuTextPage *page,
+ char *text)
+{
+ char *haystack = page->text;
+ int search_len;
+ EvRectangle *result;
+ if (page->links->len == 0)
+ return;
+
+ search_len = strlen (text);
+ while ((haystack = strstr (haystack, text)) != NULL) {
+ int start_p = haystack - page->text;
+ miniexp_t start = djvu_text_page_position (page, start_p);
+ int end_p = start_p + search_len - 1;
+ miniexp_t end = djvu_text_page_position (page, end_p);
+ result = djvu_text_page_box (page, start, end);
+ g_assert (result);
+ page->results = g_list_prepend (page->results, result);
+ haystack = haystack + search_len;
+ }
+ page->results = g_list_reverse (page->results);
+}
+
+
+/**
+ * djvu_text_page_prepare_search:
+ * @page: #DjvuTextPage instance
+ * @case_sensitive: do not ignore case
+ *
+ * Indexes the page text and prepares the page for subsequent searches.
+ */
+void
+djvu_text_page_prepare_search (DjvuTextPage *page,
+ gboolean case_sensitive)
+{
+ djvu_text_page_append_text (page, page->text_structure,
+ case_sensitive, FALSE);
+}
+
+/**
+ * djvu_text_page_new:
+ * @text: S-expression of the page text
+ *
+ * Creates a new page to search.
+ *
+ * Returns: new #DjvuTextPage instance
+ */
+DjvuTextPage *
+djvu_text_page_new (miniexp_t text)
+{
+ DjvuTextPage *page;
+
+ page = g_new0 (DjvuTextPage, 1);
+ page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink));
+ page->char_symbol = miniexp_symbol ("char");
+ page->word_symbol = miniexp_symbol ("word");
+ page->text_structure = text;
+ return page;
+}
+
+/**
+ * djvu_text_page_free:
+ * @page: #DjvuTextPage instance
+ *
+ * Frees the given #DjvuTextPage instance.
+ */
+void
+djvu_text_page_free (DjvuTextPage *page)
+{
+ g_free (page->text);
+ g_array_free (page->links, TRUE);
+ g_free (page);
+}
diff --git a/djvu/djvu-text-page.h b/djvu/djvu-text-page.h
new file mode 100644
index 0000000..db53326
--- /dev/null
+++ b/djvu/djvu-text-page.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __DJVU_TEXT_PAGE_H__
+#define __DJVU_TEXT_PAGE_H__
+
+#include "ev-document.h"
+
+#include <string.h>
+#include <glib.h>
+
+#include <libdjvu/ddjvuapi.h>
+
+typedef struct _DjvuTextPage DjvuTextPage;
+typedef struct _DjvuTextLink DjvuTextLink;
+
+struct _DjvuTextPage {
+ char *text;
+ GArray *links;
+ GList *results;
+ miniexp_t char_symbol;
+ miniexp_t word_symbol;
+ EvRectangle *bounding_box;
+ miniexp_t text_structure;
+ miniexp_t start;
+ miniexp_t end;
+};
+
+struct _DjvuTextLink {
+ int position;
+ miniexp_t pair;
+};
+
+char * djvu_text_page_copy (DjvuTextPage *page,
+ EvRectangle *rectangle);
+void djvu_text_page_prepare_search (DjvuTextPage *page,
+ gboolean case_sensitive);
+void djvu_text_page_search (DjvuTextPage *page,
+ char *text);
+DjvuTextPage* djvu_text_page_new (miniexp_t text);
+void djvu_text_page_free (DjvuTextPage *page);
+
+#endif /* __DJVU_TEXT_PAGE_H__ */
+
diff --git a/djvu/djvu-text.c b/djvu/djvu-text.c
new file mode 100644
index 0000000..beaac6b
--- /dev/null
+++ b/djvu/djvu-text.c
@@ -0,0 +1,298 @@
+/*
+ * Implements search and copy functionality for Djvu files.
+ * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "djvu-document-private.h"
+#include "djvu-document.h"
+#include "djvu-text.h"
+#include "djvu-text-page.h"
+#include "ev-document-find.h"
+#include "ev-document.h"
+
+#include <string.h>
+#include <glib.h>
+
+struct _DjvuText {
+ DjvuDocument *document;
+ gboolean case_sensitive;
+ char *text;
+ GList **pages;
+ guint idle;
+ int start_page;
+ int search_page;
+};
+
+/**
+ * djvu_text_idle_callback:
+ * @data: #DjvuText instance
+ *
+ * Idle callback that processes one page at a time.
+ *
+ * Returns: whether there are more pages to be processed
+ */
+static gboolean
+djvu_text_idle_callback (void *data)
+{
+ DjvuText *djvu_text = (DjvuText *) data;
+ DjvuDocument *djvu_document = djvu_text->document;
+ int n_pages;
+ miniexp_t page_text;
+
+ ev_document_doc_mutex_lock ();
+ while ((page_text =
+ ddjvu_document_get_pagetext (djvu_document->d_document,
+ djvu_text->search_page,
+ "char")) == miniexp_dummy)
+ djvu_handle_events (djvu_document, TRUE);
+
+ if (page_text != miniexp_nil) {
+ DjvuTextPage *page = djvu_text_page_new (page_text);
+ djvu_text_page_prepare_search (page, djvu_text->case_sensitive);
+ if (page->links->len > 0) {
+ djvu_text_page_search (page, djvu_text->text);
+ djvu_text->pages[djvu_text->search_page] = page->results;
+ ev_document_find_changed (EV_DOCUMENT_FIND
+ (djvu_document),
+ djvu_text->search_page);
+ }
+ djvu_text_page_free (page);
+ ddjvu_miniexp_release (djvu_document->d_document,
+ page_text);
+ }
+ ev_document_doc_mutex_unlock ();
+
+ n_pages =
+ djvu_document_get_n_pages (EV_DOCUMENT (djvu_text->document));
+ djvu_text->search_page += 1;
+ if (djvu_text->search_page == n_pages) {
+ /* wrap around */
+ djvu_text->search_page = 0;
+ }
+
+ if (djvu_text->search_page != djvu_text->start_page)
+ return TRUE;
+
+ /* We're done. */
+ djvu_text->idle = 0;
+ /* will return FALSE to remove */
+ return FALSE;
+}
+
+/**
+ * djvu_text_new:
+ * @djvu_document: document to search
+ * @start_page: first page to search
+ * @case_sensitive: uses g_utf8_case_fold() to enable case-insensitive
+ * searching
+ * @text: text to search
+ *
+ * Creates a new #DjvuText instance to enable searching. An idle call
+ * is used to process all pages starting from @start_page.
+ *
+ * Returns: newly created instance
+ */
+DjvuText *
+djvu_text_new (DjvuDocument *djvu_document,
+ int start_page,
+ gboolean case_sensitive,
+ const char *text)
+{
+ DjvuText *djvu_text;
+ int n_pages;
+ int i;
+
+ n_pages = djvu_document_get_n_pages (EV_DOCUMENT (djvu_document));
+
+ djvu_text = g_new0 (DjvuText, 1);
+
+ if (case_sensitive)
+ djvu_text->text = g_strdup (text);
+ else
+ djvu_text->text = g_utf8_casefold (text, -1);
+ djvu_text->pages = g_new0 (GList *, n_pages);
+ for (i = 0; i < n_pages; i++) {
+ djvu_text->pages[i] = NULL;
+ }
+
+ djvu_text->document = djvu_document;
+
+ /* We add at low priority so the progress bar repaints */
+ djvu_text->idle = g_idle_add_full (G_PRIORITY_LOW,
+ djvu_text_idle_callback,
+ djvu_text, NULL);
+
+ djvu_text->case_sensitive = case_sensitive;
+ djvu_text->start_page = start_page;
+ djvu_text->search_page = start_page;
+
+ return djvu_text;
+}
+
+/**
+ * djvu_text_copy:
+ * @djvu_document: document to search
+ * @page: page to search
+ * @rectangle: rectangle to copy
+ *
+ * Copies and returns the text in the given rectangle.
+ *
+ * Returns: newly allocated text or NULL of none is available
+ */
+char *
+djvu_text_copy (DjvuDocument *djvu_document,
+ int page,
+ EvRectangle *rectangle)
+{
+ miniexp_t page_text;
+ char* text = NULL;
+
+ while ((page_text =
+ ddjvu_document_get_pagetext (djvu_document->d_document,
+ page, "char")) == miniexp_dummy)
+ djvu_handle_events (djvu_document, TRUE);
+
+ if (page_text != miniexp_nil) {
+ DjvuTextPage *page = djvu_text_page_new (page_text);
+ text = djvu_text_page_copy (page, rectangle);
+ djvu_text_page_free (page);
+ ddjvu_miniexp_release (djvu_document->d_document, page_text);
+ }
+
+ return text;
+}
+
+/**
+ * djvu_text_free:
+ * @djvu_text: instance to free
+ *
+ * Frees the given #DjvuText instance.
+ */
+void djvu_text_free (DjvuText * djvu_text)
+{
+ DjvuDocument *djvu_document = djvu_text->document;
+ int n_pages;
+ int i;
+
+ if (djvu_text->idle != 0)
+ g_source_remove (djvu_text->idle);
+
+ n_pages = djvu_document_get_n_pages (EV_DOCUMENT (djvu_document));
+ for (i = 0; i < n_pages; i++) {
+ g_list_foreach (djvu_text->pages[i], (GFunc) g_free, NULL);
+ g_list_free (djvu_text->pages[i]);
+ }
+
+ g_free (djvu_text->text);
+}
+
+/**
+ * djvu_text_get_text:
+ * @djvu_text: #DjvuText instance
+ *
+ * Returns the search text. This is mainly to be able to avoid reinstantiation
+ * for the same search text.
+ *
+ * Returns: the text this instance of #DjvuText is looking for
+ */
+const char *
+djvu_text_get_text (DjvuText *djvu_text)
+{
+ return djvu_text->text;
+}
+
+/**
+ * djvu_text_n_results:
+ * @djvu_text: #DjvuText instance
+ * @page: page number
+ *
+ * Returns the number of search results available for the given page.
+ *
+ * Returns: number of search results
+ */
+int
+djvu_text_n_results (DjvuText *djvu_text,
+ int page)
+{
+ return g_list_length (djvu_text->pages[page]);
+}
+
+/**
+ * djvu_text_has_results:
+ * @djvu_text: #DjvuText instance
+ * @page: page number
+ *
+ * Returns whether there are search results available for the given page.
+ * This method executes faster than djvu_text_n_results().
+ *
+ * Returns: whether there are search results
+ */
+int
+djvu_text_has_results (DjvuText *djvu_text,
+ int page)
+{
+ return djvu_text->pages[page] != NULL;
+}
+
+/**
+ * djvu_text_get_result:
+ * @djvu_text: #DjvuText instance
+ * @page: page number
+ * @n_result: result number
+ *
+ * Returns the n-th search result of a given page. The coordinates are
+ * Djvu-specific and need to be processed to be compatible with the Evince
+ * coordinate system. The result may span several lines!
+ *
+ * Returns: the rectangle for the search result
+ */
+EvRectangle *
+djvu_text_get_result (DjvuText *djvu_text,
+ int page,
+ int n_result)
+{
+ return (EvRectangle *) g_list_nth_data (djvu_text->pages[page],
+ n_result);
+}
+
+/**
+ * djvu_text_get_progress:
+ * @djvu_text: #DjvuText instance
+ *
+ * Returns the percentage of pages done searching.
+ *
+ * Returns: the progress as value between 0 and 1
+ */
+double
+djvu_text_get_progress (DjvuText *djvu_text)
+{
+ int pages_done;
+ int n_pages;
+
+ n_pages =
+ djvu_document_get_n_pages (EV_DOCUMENT (djvu_text->document));
+ if (djvu_text->search_page > djvu_text->start_page) {
+ pages_done = djvu_text->search_page - djvu_text->start_page + 1;
+ } else if (djvu_text->search_page == djvu_text->start_page) {
+ pages_done = n_pages;
+ } else {
+ pages_done =
+ n_pages - djvu_text->start_page + djvu_text->search_page;
+ }
+ return pages_done / (double) n_pages;
+}
+
diff --git a/djvu/djvu-text.h b/djvu/djvu-text.h
new file mode 100644
index 0000000..1ed0d0a
--- /dev/null
+++ b/djvu/djvu-text.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2006 Michael Hofmann <mh21@piware.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __DJVU_TEXT_H__
+#define __DJVU_TEXT_H__
+
+#include "ev-document.h"
+
+#include <glib.h>
+#include <libdjvu/ddjvuapi.h>
+
+typedef struct miniexp_s* miniexp_t;
+extern const miniexp_t miniexp_nil;
+extern const miniexp_t miniexp_dummy;
+
+typedef struct _DjvuText DjvuText;
+
+DjvuText *djvu_text_new (DjvuDocument *djvu_document,
+ int start_page,
+ gboolean case_sensitive,
+ const char *text);
+const char *djvu_text_get_text (DjvuText *djvu_text);
+int djvu_text_n_results (DjvuText *djvu_text,
+ int page);
+EvRectangle *djvu_text_get_result (DjvuText *djvu_text,
+ int page,
+ int n_result);
+int djvu_text_has_results (DjvuText *djvu_text,
+ int page);
+double djvu_text_get_progress (DjvuText *djvu_text);
+char *djvu_text_copy (DjvuDocument *djvu_document,
+ int page,
+ EvRectangle *rectangle);
+
+#endif /* __DJVU_TEXT_H__ */