From 2fcdb759f53fee8ccca7eaa3d67743cc8d42b0fc Mon Sep 17 00:00:00 2001 From: Nickolay V. Shmyrev Date: Sun, 19 Feb 2006 20:22:57 +0000 Subject: Text search in djvu. Fix for the bug #303415. * configure.ac: * djvu/Makefile.am: * djvu/djvu-document-private.h: * djvu/djvu-document.c: (G_DEFINE_TYPE_WITH_CODE), (djvu_handle_events), (djvu_document_load), (djvu_document_save), (djvu_document_get_page_size), (djvu_document_render_pixbuf), (djvu_document_can_get_text), (djvu_document_get_text), (djvu_document_document_iface_init), (djvu_document_thumbnails_get_thumbnail), (djvu_document_find_begin), (djvu_document_find_get_n_results), (djvu_document_find_get_result), (djvu_document_find_page_has_results), (djvu_document_find_get_progress), (djvu_document_find_cancel), (djvu_document_find_iface_init): * djvu/djvu-text-page.c: (djvu_text_page_selection_process), (djvu_text_page_selection), (djvu_text_page_limits_process), (djvu_text_page_limits), (djvu_text_page_copy), (djvu_text_page_position), (djvu_text_page_union), (djvu_text_page_sexpr_process), (djvu_text_page_sexpr), (djvu_text_page_box), (djvu_text_page_append_text), (djvu_text_page_search), (djvu_text_page_prepare_search), (djvu_text_page_new), (djvu_text_page_free): * djvu/djvu-text-page.h: * djvu/djvu-text.c: (djvu_text_idle_callback), (djvu_text_new), (djvu_text_copy), (djvu_text_free), (djvu_text_get_text), (djvu_text_n_results), (djvu_text_has_results), (djvu_text_get_result), (djvu_text_get_progress): * djvu/djvu-text.h: Text search in djvu. Fix for the bug #303415. --- (limited to 'djvu') diff --git a/djvu/Makefile.am b/djvu/Makefile.am index 7afa523..478943f 100644 --- a/djvu/Makefile.am +++ b/djvu/Makefile.am @@ -9,7 +9,12 @@ noinst_LTLIBRARIES = libgtkdjvu.la libgtkdjvu_la_SOURCES = \ djvu-document.c \ - djvu-document.h + djvu-document.h \ + djvu-document-private.h \ + djvu-text.c \ + djvu-text.h \ + djvu-text-page.c \ + djvu-text-page.h libgtkdjvu_la_LIBADD = -lpthread -ldjvulibre diff --git a/djvu/djvu-document-private.h b/djvu/djvu-document-private.h new file mode 100644 index 0000000..3fa579f --- /dev/null +++ b/djvu/djvu-document-private.h @@ -0,0 +1,45 @@ +/* + * Declarations used throughout the djvu classes + * + * Copyright (C) 2006, Michael Hofmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __DJVU_DOCUMENT_INTERNAL_H__ +#define __DJVU_DOCUMENT_INTERNAL_H__ + +#include "djvu-document.h" +#include "djvu-text.h" + +#include + +struct _DjvuDocument { + GObject parent_instance; + + ddjvu_context_t *d_context; + ddjvu_document_t *d_document; + ddjvu_format_t *d_format; + + gchar *uri; + + DjvuText *search; +}; + +int djvu_document_get_n_pages (EvDocument *document); +void djvu_handle_events (DjvuDocument *djvu_document, + int wait); + +#endif /* __DJVU_DOCUMENT_INTERNAL_H__ */ diff --git a/djvu/djvu-document.c b/djvu/djvu-document.c index 135d81c..e50fb8f 100644 --- a/djvu/djvu-document.c +++ b/djvu/djvu-document.c @@ -18,12 +18,17 @@ */ #include "djvu-document.h" +#include "djvu-text.h" +#include "djvu-document-private.h" #include "ev-document-thumbnails.h" #include "ev-document-misc.h" +#include "ev-document-find.h" #include #include #include +#include +#include #define SCALE_FACTOR 0.2 @@ -37,33 +42,50 @@ struct _DjvuDocumentClass GObjectClass parent_class; }; -struct _DjvuDocument -{ - GObject parent_instance; - - ddjvu_context_t *d_context; - ddjvu_document_t *d_document; - ddjvu_format_t *d_format; - - gchar *uri; -}; - typedef struct _DjvuDocumentClass DjvuDocumentClass; static void djvu_document_document_iface_init (EvDocumentIface *iface); static void djvu_document_document_thumbnails_iface_init (EvDocumentThumbnailsIface *iface); +static void djvu_document_find_iface_init (EvDocumentFindIface *iface); G_DEFINE_TYPE_WITH_CODE (DjvuDocument, djvu_document, G_TYPE_OBJECT, { G_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT, djvu_document_document_iface_init); G_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_THUMBNAILS, djvu_document_document_thumbnails_iface_init) + G_IMPLEMENT_INTERFACE (EV_TYPE_DOCUMENT_FIND, djvu_document_find_iface_init); }); + +void +djvu_handle_events (DjvuDocument *djvu_document, int wait) +{ + ddjvu_context_t *ctx = djvu_document->d_context; + const ddjvu_message_t *msg; + if (!ctx) + return; + if (wait) + msg = ddjvu_message_wait (ctx); + while ((msg = ddjvu_message_peek (ctx))) { + switch (msg->m_any.tag) { + case DDJVU_ERROR: + g_warning ("DjvuLibre error: %s", + msg->m_error.message); + if (msg->m_error.filename) + g_warning ("DjvuLibre error: %s:%d", + msg->m_error.filename, + msg->m_error.lineno); + default: + break; + } + ddjvu_message_pop (ctx); + } +} + static gboolean djvu_document_load (EvDocument *document, - const char *uri, - GError **error) + const char *uri, + GError **error) { DjvuDocument *djvu_document = DJVU_DOCUMENT (document); ddjvu_document_t *doc; @@ -83,10 +105,8 @@ djvu_document_load (EvDocument *document, djvu_document->d_document = doc; - while (!ddjvu_document_decoding_done (djvu_document->d_document)) { - ddjvu_message_wait (djvu_document->d_context); - ddjvu_message_pop (djvu_document->d_context); - } + while (!ddjvu_document_decoding_done (djvu_document->d_document)) + djvu_handle_events(djvu_document, TRUE); g_free (djvu_document->uri); djvu_document->uri = g_strdup (uri); @@ -96,15 +116,15 @@ djvu_document_load (EvDocument *document, static gboolean djvu_document_save (EvDocument *document, - const char *uri, - GError **error) + const char *uri, + GError **error) { DjvuDocument *djvu_document = DJVU_DOCUMENT (document); return ev_xfer_uri_simple (djvu_document->uri, uri, error); } -static int +int djvu_document_get_n_pages (EvDocument *document) { DjvuDocument *djvu_document = DJVU_DOCUMENT (document); @@ -116,19 +136,17 @@ djvu_document_get_n_pages (EvDocument *document) static void djvu_document_get_page_size (EvDocument *document, - int page, - double *width, - double *height) + int page, + double *width, + double *height) { DjvuDocument *djvu_document = DJVU_DOCUMENT (document); ddjvu_pageinfo_t info; g_return_if_fail (djvu_document->d_document); - while (ddjvu_document_get_pageinfo(djvu_document->d_document, page, &info) < DDJVU_JOB_OK) { - ddjvu_message_wait (djvu_document->d_context); - ddjvu_message_pop (djvu_document->d_context); - } + while (ddjvu_document_get_pageinfo(djvu_document->d_document, page, &info) < DDJVU_JOB_OK) + djvu_handle_events(djvu_document, TRUE); *width = info.width * SCALE_FACTOR; *height = info.height * SCALE_FACTOR; @@ -150,10 +168,8 @@ djvu_document_render_pixbuf (EvDocument *document, d_page = ddjvu_page_create_by_pageno (djvu_document->d_document, rc->page); - while (!ddjvu_page_decoding_done (d_page)) { - ddjvu_message_wait (djvu_document->d_context); - ddjvu_message_pop (djvu_document->d_context); - } + while (!ddjvu_page_decoding_done (d_page)) + djvu_handle_events(djvu_document, TRUE); page_width = ddjvu_page_get_width (d_page) * rc->scale * SCALE_FACTOR; page_height = ddjvu_page_get_height (d_page) * rc->scale * SCALE_FACTOR; @@ -203,7 +219,30 @@ djvu_document_class_init (DjvuDocumentClass *klass) static gboolean djvu_document_can_get_text (EvDocument *document) { - return FALSE; + return TRUE; +} + + +static char * +djvu_document_get_text (EvDocument *document, int page, EvRectangle *rect) +{ + DjvuDocument *djvu_document = DJVU_DOCUMENT (document); + double width, height; + EvRectangle rectangle; + char* text; + + djvu_document_get_page_size (document, page, &width, &height); + rectangle.x1 = rect->x1 / SCALE_FACTOR; + rectangle.y1 = (height - rect->y2) / SCALE_FACTOR; + rectangle.x2 = rect->x2 / SCALE_FACTOR; + rectangle.y2 = (height - rect->y1) / SCALE_FACTOR; + + text = djvu_text_copy (djvu_document, page, &rectangle); + + if (text == NULL) + text = g_strdup (""); + + return text; } static EvDocumentInfo * @@ -222,6 +261,7 @@ djvu_document_document_iface_init (EvDocumentIface *iface) iface->load = djvu_document_load; iface->save = djvu_document_save; iface->can_get_text = djvu_document_can_get_text; + iface->get_text = djvu_document_get_text; iface->get_n_pages = djvu_document_get_n_pages; iface->get_page_size = djvu_document_get_page_size; iface->render_pixbuf = djvu_document_render_pixbuf; @@ -270,10 +310,8 @@ djvu_document_thumbnails_get_thumbnail (EvDocumentThumbnails *document, gdk_pixbuf_fill (pixbuf, 0xffffffff); pixels = gdk_pixbuf_get_pixels (pixbuf); - while (ddjvu_thumbnail_status (djvu_document->d_document, page, 1) < DDJVU_JOB_OK) { - ddjvu_message_wait (djvu_document->d_context); - ddjvu_message_pop (djvu_document->d_context); - } + while (ddjvu_thumbnail_status (djvu_document->d_document, page, 1) < DDJVU_JOB_OK) + djvu_handle_events(djvu_document, TRUE); ddjvu_thumbnail_render (djvu_document->d_document, page, &thumb_width, &thumb_height, @@ -310,3 +348,106 @@ djvu_document_init (DjvuDocument *djvu_document) djvu_document->d_document = NULL; } +static void +djvu_document_find_begin (EvDocumentFind *document, + int page, + const char *search_string, + gboolean case_sensitive) +{ + DjvuDocument *djvu_document = DJVU_DOCUMENT (document); + + if (djvu_document->search && + strcmp (search_string, djvu_text_get_text (djvu_document->search)) == 0) + return; + + if (djvu_document->search) + djvu_text_free (djvu_document->search); + + djvu_document->search = djvu_text_new (djvu_document, + page, + case_sensitive, + search_string); +} + +static int +djvu_document_find_get_n_results (EvDocumentFind *document_find, int page) +{ + DjvuText *search = DJVU_DOCUMENT (document_find)->search; + + if (search) { + return djvu_text_n_results (search, page); + } else { + return 0; + } +} + +static gboolean +djvu_document_find_get_result (EvDocumentFind *document_find, + int page, + int n_result, + EvRectangle *rectangle) +{ + DjvuDocument *djvu_document = DJVU_DOCUMENT (document_find); + DjvuText *search = djvu_document->search; + EvRectangle *r; + double width, height; + + if (search == NULL) + return FALSE; + + r = djvu_text_get_result (search, page, n_result); + if (r == NULL) + return FALSE; + + djvu_document_get_page_size (EV_DOCUMENT (djvu_document), + page, &width, &height); + rectangle->x1 = r->x1 * SCALE_FACTOR; + rectangle->y1 = height - r->y2 * SCALE_FACTOR; + rectangle->x2 = r->x2 * SCALE_FACTOR; + rectangle->y2 = height - r->y1 * SCALE_FACTOR; + + return TRUE; +} + +static int +djvu_document_find_page_has_results (EvDocumentFind *document_find, + int page) +{ + DjvuText *search = DJVU_DOCUMENT (document_find)->search; + + return search && djvu_text_has_results (search, page); +} + +static double +djvu_document_find_get_progress (EvDocumentFind *document_find) +{ + DjvuText *search = DJVU_DOCUMENT (document_find)->search; + + if (search == NULL) { + return 0; + } + + return djvu_text_get_progress (search); +} + +static void +djvu_document_find_cancel (EvDocumentFind *document) +{ + DjvuDocument *djvu_document = DJVU_DOCUMENT (document); + + if (djvu_document->search) { + djvu_text_free (djvu_document->search); + djvu_document->search = NULL; + } +} + +static void +djvu_document_find_iface_init (EvDocumentFindIface *iface) +{ + iface->begin = djvu_document_find_begin; + iface->get_n_results = djvu_document_find_get_n_results; + iface->get_result = djvu_document_find_get_result; + iface->page_has_results = djvu_document_find_page_has_results; + iface->get_progress = djvu_document_find_get_progress; + iface->cancel = djvu_document_find_cancel; +} diff --git a/djvu/djvu-text-page.c b/djvu/djvu-text-page.c new file mode 100644 index 0000000..c19d6f6 --- /dev/null +++ b/djvu/djvu-text-page.c @@ -0,0 +1,444 @@ +/* + * Implements search and copy functionality for Djvu files. + * Copyright (C) 2006 Michael Hofmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include "djvu-text-page.h" + +#include + +/** + * djvu_text_page_selection_process: + * @page: #DjvuTextPage instance + * @p: s-expression to append + * @delimit: character/word/... delimiter + * + * Appends the string in @p to the page text. + * + * Returns: whether the end was not reached in this s-expression + */ +static gboolean +djvu_text_page_selection_process (DjvuTextPage *page, + miniexp_t p, + int delimit) +{ + if (page->text || p == page->start) { + char *token_text = (char *) miniexp_to_str (miniexp_nth (5, p)); + if (page->text) { + char *new_text = + g_strjoin (delimit & 2 ? "\n" : + delimit & 1 ? " " : NULL, + page->text, token_text, + NULL); + g_free (page->text); + page->text = new_text; + } else + page->text = g_strdup (token_text); + if (p == page->end) + return FALSE; + } + return TRUE; +} + +/** + * djvu_text_page_selection: + * @page: #DjvuTextPage instance + * @p: tree to append + * @delimit: character/word/... delimiter + * + * Walks the tree in @p and appends the text with + * djvu_text_page_selection_process() for all s-expressions + * between the start and end fields. + * + * Returns: whether the end was not reached in this subtree + */ +static gboolean +djvu_text_page_selection (DjvuTextPage *page, + miniexp_t p, + int delimit) +{ + g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp + (miniexp_car (p)), FALSE); + + if (miniexp_car (p) != page->char_symbol) + delimit |= miniexp_car (p) == page->word_symbol ? 1 : 2; + + miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); + while (deeper != miniexp_nil) { + miniexp_t str = miniexp_car (deeper); + if (miniexp_stringp (str)) { + if (!djvu_text_page_selection_process + (page, p, delimit)) + return FALSE; + } else { + if (!djvu_text_page_selection + (page, str, delimit)) + return FALSE; + } + delimit = 0; + deeper = miniexp_cdr (deeper); + } + return TRUE; +} + +static void +djvu_text_page_limits_process (DjvuTextPage *page, + miniexp_t p, + EvRectangle *rect) +{ + EvRectangle current; + + current.x1 = miniexp_to_int (miniexp_nth (1, p)); + current.y1 = miniexp_to_int (miniexp_nth (2, p)); + current.x2 = miniexp_to_int (miniexp_nth (3, p)); + current.y2 = miniexp_to_int (miniexp_nth (4, p)); + if (current.x2 >= rect->x1 && current.y1 <= rect->y2 && + current.x1 <= rect->x2 && current.y2 >= rect->y1) { + if (page->start == miniexp_nil) + page->start = p; + page->end = p; + } +} + + +static void +djvu_text_page_limits (DjvuTextPage *page, + miniexp_t p, + EvRectangle *rect) +{ + char *token_text; + + g_return_if_fail (miniexp_consp (p) && + miniexp_symbolp (miniexp_car (p))); + + miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); + while (deeper != miniexp_nil) { + miniexp_t str = miniexp_car (deeper); + if (miniexp_stringp (str)) + djvu_text_page_limits_process (page, p, rect); + else + djvu_text_page_limits (page, str, rect); + + deeper = miniexp_cdr (deeper); + } +} + +char * +djvu_text_page_copy (DjvuTextPage *page, + EvRectangle *rectangle) +{ + char* text; + + page->start = miniexp_nil; + page->end = miniexp_nil; + djvu_text_page_limits (page, page->text_structure, rectangle); + djvu_text_page_selection (page, page->text_structure, 0); + + /* Do not free the string */ + text = page->text; + page->text = NULL; + + return text; +} + +/** + * djvu_text_page_position: + * @page: #DjvuTextPage instance + * @position: index in the page text + * + * Returns the closest s-expression that contains the given position in + * the page text. + * + * Returns: closest s-expression + */ +static miniexp_t +djvu_text_page_position (DjvuTextPage *page, + int position) +{ + GArray *links = page->links; + int low = 0; + int hi = links->len - 1; + int mid = 0; + + g_return_val_if_fail (hi >= 0, miniexp_nil); + + /* Shamelessly copied from GNU classpath */ + while (low <= hi) { + mid = (low + hi) >> 1; + DjvuTextLink *link = + &g_array_index (links, DjvuTextLink, mid); + if (link->position == position) + break; + else if (link->position > position) + hi = --mid; + else + low = mid + 1; + } + + return g_array_index (page->links, DjvuTextLink, mid).pair; +} + +/** + * djvu_text_page_union: + * @target: first rectangle and result + * @source: second rectangle + * + * Calculates the bounding box of two rectangles and stores the reuslt + * in the first. + */ +static void +djvu_text_page_union (EvRectangle *target, + EvRectangle *source) +{ + if (source->x1 < target->x1) + target->x1 = source->x1; + if (source->x2 > target->x2) + target->x2 = source->x2; + if (source->y1 < target->y1) + target->y1 = source->y1; + if (source->y2 > target->y2) + target->y2 = source->y2; +} + +/** + * djvu_text_page_sexpr_process: + * @page: #DjvuTextPage instance + * @p: s-expression to append + * @start: first s-expression in the selection + * @end: last s-expression in the selection + * + * Appends the rectangle defined by @p to the internal bounding box rectangle. + * + * Returns: whether the end was not reached in this s-expression + */ +static gboolean +djvu_text_page_sexpr_process (DjvuTextPage *page, + miniexp_t p, + miniexp_t start, + miniexp_t end) +{ + if (page->bounding_box || p == start) { + EvRectangle *new_rectangle = g_new (EvRectangle, 1); + new_rectangle->x1 = miniexp_to_int (miniexp_nth (1, p)); + new_rectangle->y1 = miniexp_to_int (miniexp_nth (2, p)); + new_rectangle->x2 = miniexp_to_int (miniexp_nth (3, p)); + new_rectangle->y2 = miniexp_to_int (miniexp_nth (4, p)); + if (page->bounding_box) { + djvu_text_page_union (page->bounding_box, + new_rectangle); + g_free (new_rectangle); + } else + page->bounding_box = new_rectangle; + if (p == end) + return FALSE; + } + return TRUE; +} + +/** + * djvu_text_page_sexpr: + * @page: #DjvuTextPage instance + * @p: tree to append + * @start: first s-expression in the selection + * @end: last s-expression in the selection + * + * Walks the tree in @p and extends the rectangle with + * djvu_text_page_process() for all s-expressions between @start and @end. + * + * Returns: whether the end was not reached in this subtree + */ +static gboolean +djvu_text_page_sexpr (DjvuTextPage *page, + miniexp_t p, + miniexp_t start, + miniexp_t end) +{ + g_return_val_if_fail (miniexp_consp (p) && miniexp_symbolp + (miniexp_car (p)), FALSE); + + miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); + while (deeper != miniexp_nil) { + miniexp_t str = miniexp_car (deeper); + if (miniexp_stringp (str)) { + if (!djvu_text_page_sexpr_process + (page, p, start, end)) + return FALSE; + } else { + if (!djvu_text_page_sexpr + (page, str, start, end)) + return FALSE; + } + deeper = miniexp_cdr (deeper); + } + return TRUE; +} + +/** + * djvu_text_page_box: + * @page: #DjvuTextPage instance + * @start: first s-expression in the selection + * @end: last s-expression in the selection + * + * Builds a rectangle that contains all s-expressions in the given range. + */ +static EvRectangle * +djvu_text_page_box (DjvuTextPage *page, + miniexp_t start, + miniexp_t end) +{ + page->bounding_box = NULL; + djvu_text_page_sexpr (page, page->text_structure, start, end); + return page->bounding_box; +} + +/** + * djvu_text_page_append_search: + * @page: #DjvuTextPage instance + * @p: tree to append + * @case_sensitive: do not ignore case + * @delimit: insert spaces because of higher (sentence/paragraph/...) break + * + * Appends the tree in @p to the internal text string. + */ +static void +djvu_text_page_append_text (DjvuTextPage *page, + miniexp_t p, + gboolean case_sensitive, + gboolean delimit) +{ + char *token_text; + + g_return_if_fail (miniexp_consp (p) && + miniexp_symbolp (miniexp_car (p))); + + delimit |= page->char_symbol != miniexp_car (p); + + miniexp_t deeper = miniexp_cddr (miniexp_cdddr (p)); + while (deeper != miniexp_nil) { + miniexp_t data = miniexp_car (deeper); + if (miniexp_stringp (data)) { + DjvuTextLink link; + link.position = page->text == NULL ? 0 : + strlen (page->text); + link.pair = p; + g_array_append_val (page->links, link); + + token_text = (char *) miniexp_to_str (data); + if (!case_sensitive) + token_text = g_utf8_casefold (token_text, -1); + if (page->text == NULL) + page->text = g_strdup (token_text); + else { + char *new_text = + g_strjoin (delimit ? " " : NULL, + page->text, token_text, + NULL); + g_free (page->text); + page->text = new_text; + } + if (!case_sensitive) + g_free (token_text); + } else + djvu_text_page_append_text (page, data, + case_sensitive, delimit); + delimit = FALSE; + deeper = miniexp_cdr (deeper); + } +} + +/** + * djvu_text_page_search: + * @page: #DjvuTextPage instance + * @text: text to search + * + * Searches the page for the given text. The results list has to be + * externally freed afterwards. + */ +void +djvu_text_page_search (DjvuTextPage *page, + char *text) +{ + char *haystack = page->text; + int search_len; + EvRectangle *result; + if (page->links->len == 0) + return; + + search_len = strlen (text); + while ((haystack = strstr (haystack, text)) != NULL) { + int start_p = haystack - page->text; + miniexp_t start = djvu_text_page_position (page, start_p); + int end_p = start_p + search_len - 1; + miniexp_t end = djvu_text_page_position (page, end_p); + result = djvu_text_page_box (page, start, end); + g_assert (result); + page->results = g_list_prepend (page->results, result); + haystack = haystack + search_len; + } + page->results = g_list_reverse (page->results); +} + + +/** + * djvu_text_page_prepare_search: + * @page: #DjvuTextPage instance + * @case_sensitive: do not ignore case + * + * Indexes the page text and prepares the page for subsequent searches. + */ +void +djvu_text_page_prepare_search (DjvuTextPage *page, + gboolean case_sensitive) +{ + djvu_text_page_append_text (page, page->text_structure, + case_sensitive, FALSE); +} + +/** + * djvu_text_page_new: + * @text: S-expression of the page text + * + * Creates a new page to search. + * + * Returns: new #DjvuTextPage instance + */ +DjvuTextPage * +djvu_text_page_new (miniexp_t text) +{ + DjvuTextPage *page; + + page = g_new0 (DjvuTextPage, 1); + page->links = g_array_new (FALSE, FALSE, sizeof (DjvuTextLink)); + page->char_symbol = miniexp_symbol ("char"); + page->word_symbol = miniexp_symbol ("word"); + page->text_structure = text; + return page; +} + +/** + * djvu_text_page_free: + * @page: #DjvuTextPage instance + * + * Frees the given #DjvuTextPage instance. + */ +void +djvu_text_page_free (DjvuTextPage *page) +{ + g_free (page->text); + g_array_free (page->links, TRUE); + g_free (page); +} diff --git a/djvu/djvu-text-page.h b/djvu/djvu-text-page.h new file mode 100644 index 0000000..db53326 --- /dev/null +++ b/djvu/djvu-text-page.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2006 Michael Hofmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __DJVU_TEXT_PAGE_H__ +#define __DJVU_TEXT_PAGE_H__ + +#include "ev-document.h" + +#include +#include + +#include + +typedef struct _DjvuTextPage DjvuTextPage; +typedef struct _DjvuTextLink DjvuTextLink; + +struct _DjvuTextPage { + char *text; + GArray *links; + GList *results; + miniexp_t char_symbol; + miniexp_t word_symbol; + EvRectangle *bounding_box; + miniexp_t text_structure; + miniexp_t start; + miniexp_t end; +}; + +struct _DjvuTextLink { + int position; + miniexp_t pair; +}; + +char * djvu_text_page_copy (DjvuTextPage *page, + EvRectangle *rectangle); +void djvu_text_page_prepare_search (DjvuTextPage *page, + gboolean case_sensitive); +void djvu_text_page_search (DjvuTextPage *page, + char *text); +DjvuTextPage* djvu_text_page_new (miniexp_t text); +void djvu_text_page_free (DjvuTextPage *page); + +#endif /* __DJVU_TEXT_PAGE_H__ */ + diff --git a/djvu/djvu-text.c b/djvu/djvu-text.c new file mode 100644 index 0000000..beaac6b --- /dev/null +++ b/djvu/djvu-text.c @@ -0,0 +1,298 @@ +/* + * Implements search and copy functionality for Djvu files. + * Copyright (C) 2006 Michael Hofmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include "djvu-document-private.h" +#include "djvu-document.h" +#include "djvu-text.h" +#include "djvu-text-page.h" +#include "ev-document-find.h" +#include "ev-document.h" + +#include +#include + +struct _DjvuText { + DjvuDocument *document; + gboolean case_sensitive; + char *text; + GList **pages; + guint idle; + int start_page; + int search_page; +}; + +/** + * djvu_text_idle_callback: + * @data: #DjvuText instance + * + * Idle callback that processes one page at a time. + * + * Returns: whether there are more pages to be processed + */ +static gboolean +djvu_text_idle_callback (void *data) +{ + DjvuText *djvu_text = (DjvuText *) data; + DjvuDocument *djvu_document = djvu_text->document; + int n_pages; + miniexp_t page_text; + + ev_document_doc_mutex_lock (); + while ((page_text = + ddjvu_document_get_pagetext (djvu_document->d_document, + djvu_text->search_page, + "char")) == miniexp_dummy) + djvu_handle_events (djvu_document, TRUE); + + if (page_text != miniexp_nil) { + DjvuTextPage *page = djvu_text_page_new (page_text); + djvu_text_page_prepare_search (page, djvu_text->case_sensitive); + if (page->links->len > 0) { + djvu_text_page_search (page, djvu_text->text); + djvu_text->pages[djvu_text->search_page] = page->results; + ev_document_find_changed (EV_DOCUMENT_FIND + (djvu_document), + djvu_text->search_page); + } + djvu_text_page_free (page); + ddjvu_miniexp_release (djvu_document->d_document, + page_text); + } + ev_document_doc_mutex_unlock (); + + n_pages = + djvu_document_get_n_pages (EV_DOCUMENT (djvu_text->document)); + djvu_text->search_page += 1; + if (djvu_text->search_page == n_pages) { + /* wrap around */ + djvu_text->search_page = 0; + } + + if (djvu_text->search_page != djvu_text->start_page) + return TRUE; + + /* We're done. */ + djvu_text->idle = 0; + /* will return FALSE to remove */ + return FALSE; +} + +/** + * djvu_text_new: + * @djvu_document: document to search + * @start_page: first page to search + * @case_sensitive: uses g_utf8_case_fold() to enable case-insensitive + * searching + * @text: text to search + * + * Creates a new #DjvuText instance to enable searching. An idle call + * is used to process all pages starting from @start_page. + * + * Returns: newly created instance + */ +DjvuText * +djvu_text_new (DjvuDocument *djvu_document, + int start_page, + gboolean case_sensitive, + const char *text) +{ + DjvuText *djvu_text; + int n_pages; + int i; + + n_pages = djvu_document_get_n_pages (EV_DOCUMENT (djvu_document)); + + djvu_text = g_new0 (DjvuText, 1); + + if (case_sensitive) + djvu_text->text = g_strdup (text); + else + djvu_text->text = g_utf8_casefold (text, -1); + djvu_text->pages = g_new0 (GList *, n_pages); + for (i = 0; i < n_pages; i++) { + djvu_text->pages[i] = NULL; + } + + djvu_text->document = djvu_document; + + /* We add at low priority so the progress bar repaints */ + djvu_text->idle = g_idle_add_full (G_PRIORITY_LOW, + djvu_text_idle_callback, + djvu_text, NULL); + + djvu_text->case_sensitive = case_sensitive; + djvu_text->start_page = start_page; + djvu_text->search_page = start_page; + + return djvu_text; +} + +/** + * djvu_text_copy: + * @djvu_document: document to search + * @page: page to search + * @rectangle: rectangle to copy + * + * Copies and returns the text in the given rectangle. + * + * Returns: newly allocated text or NULL of none is available + */ +char * +djvu_text_copy (DjvuDocument *djvu_document, + int page, + EvRectangle *rectangle) +{ + miniexp_t page_text; + char* text = NULL; + + while ((page_text = + ddjvu_document_get_pagetext (djvu_document->d_document, + page, "char")) == miniexp_dummy) + djvu_handle_events (djvu_document, TRUE); + + if (page_text != miniexp_nil) { + DjvuTextPage *page = djvu_text_page_new (page_text); + text = djvu_text_page_copy (page, rectangle); + djvu_text_page_free (page); + ddjvu_miniexp_release (djvu_document->d_document, page_text); + } + + return text; +} + +/** + * djvu_text_free: + * @djvu_text: instance to free + * + * Frees the given #DjvuText instance. + */ +void djvu_text_free (DjvuText * djvu_text) +{ + DjvuDocument *djvu_document = djvu_text->document; + int n_pages; + int i; + + if (djvu_text->idle != 0) + g_source_remove (djvu_text->idle); + + n_pages = djvu_document_get_n_pages (EV_DOCUMENT (djvu_document)); + for (i = 0; i < n_pages; i++) { + g_list_foreach (djvu_text->pages[i], (GFunc) g_free, NULL); + g_list_free (djvu_text->pages[i]); + } + + g_free (djvu_text->text); +} + +/** + * djvu_text_get_text: + * @djvu_text: #DjvuText instance + * + * Returns the search text. This is mainly to be able to avoid reinstantiation + * for the same search text. + * + * Returns: the text this instance of #DjvuText is looking for + */ +const char * +djvu_text_get_text (DjvuText *djvu_text) +{ + return djvu_text->text; +} + +/** + * djvu_text_n_results: + * @djvu_text: #DjvuText instance + * @page: page number + * + * Returns the number of search results available for the given page. + * + * Returns: number of search results + */ +int +djvu_text_n_results (DjvuText *djvu_text, + int page) +{ + return g_list_length (djvu_text->pages[page]); +} + +/** + * djvu_text_has_results: + * @djvu_text: #DjvuText instance + * @page: page number + * + * Returns whether there are search results available for the given page. + * This method executes faster than djvu_text_n_results(). + * + * Returns: whether there are search results + */ +int +djvu_text_has_results (DjvuText *djvu_text, + int page) +{ + return djvu_text->pages[page] != NULL; +} + +/** + * djvu_text_get_result: + * @djvu_text: #DjvuText instance + * @page: page number + * @n_result: result number + * + * Returns the n-th search result of a given page. The coordinates are + * Djvu-specific and need to be processed to be compatible with the Evince + * coordinate system. The result may span several lines! + * + * Returns: the rectangle for the search result + */ +EvRectangle * +djvu_text_get_result (DjvuText *djvu_text, + int page, + int n_result) +{ + return (EvRectangle *) g_list_nth_data (djvu_text->pages[page], + n_result); +} + +/** + * djvu_text_get_progress: + * @djvu_text: #DjvuText instance + * + * Returns the percentage of pages done searching. + * + * Returns: the progress as value between 0 and 1 + */ +double +djvu_text_get_progress (DjvuText *djvu_text) +{ + int pages_done; + int n_pages; + + n_pages = + djvu_document_get_n_pages (EV_DOCUMENT (djvu_text->document)); + if (djvu_text->search_page > djvu_text->start_page) { + pages_done = djvu_text->search_page - djvu_text->start_page + 1; + } else if (djvu_text->search_page == djvu_text->start_page) { + pages_done = n_pages; + } else { + pages_done = + n_pages - djvu_text->start_page + djvu_text->search_page; + } + return pages_done / (double) n_pages; +} + diff --git a/djvu/djvu-text.h b/djvu/djvu-text.h new file mode 100644 index 0000000..1ed0d0a --- /dev/null +++ b/djvu/djvu-text.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2006 Michael Hofmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __DJVU_TEXT_H__ +#define __DJVU_TEXT_H__ + +#include "ev-document.h" + +#include +#include + +typedef struct miniexp_s* miniexp_t; +extern const miniexp_t miniexp_nil; +extern const miniexp_t miniexp_dummy; + +typedef struct _DjvuText DjvuText; + +DjvuText *djvu_text_new (DjvuDocument *djvu_document, + int start_page, + gboolean case_sensitive, + const char *text); +const char *djvu_text_get_text (DjvuText *djvu_text); +int djvu_text_n_results (DjvuText *djvu_text, + int page); +EvRectangle *djvu_text_get_result (DjvuText *djvu_text, + int page, + int n_result); +int djvu_text_has_results (DjvuText *djvu_text, + int page); +double djvu_text_get_progress (DjvuText *djvu_text); +char *djvu_text_copy (DjvuDocument *djvu_document, + int page, + EvRectangle *rectangle); + +#endif /* __DJVU_TEXT_H__ */ -- cgit v0.9.1