From 2123bbd08ca8a9f3852832570b205a5a972a3055 Mon Sep 17 00:00:00 2001 From: Gonzalo Odiard Date: Thu, 24 Feb 2011 20:30:58 +0000 Subject: Initial implementation of text to speech in epub backend No highlight of spoken word yet. --- diff --git a/epubadapter.py b/epubadapter.py index 2b1b597..ab6eb01 100644 --- a/epubadapter.py +++ b/epubadapter.py @@ -2,6 +2,9 @@ import gobject import logging import epubview +import speech + +from cStringIO import StringIO _logger = logging.getLogger('read-activity') @@ -18,9 +21,14 @@ class EpubViewer(epubview.EpubView): activity._hbox.pack_start(self, expand=True, fill=True) self.show_all() + # text to speech initialization + self.current_word = 0 + self.word_tuples = [] def load_document(self, file_path): self.set_document(EpubDocument(self, file_path.replace('file://', ''))) + speech.highlight_cb = self.highlight_next_word + speech.end_text_cb = self.get_more_text def load_metadata(self, activity): @@ -50,7 +58,32 @@ class EpubViewer(epubview.EpubView): return False def can_do_text_to_speech(self): - return False + return True + + def get_marked_words(self): + "Adds a mark between each word of text." + i = self.current_word + file_str = StringIO() + file_str.write(' ') + end_range = i + 40 + if end_range > len(self.word_tuples): + end_range = len(self.word_tuples) + for word_tuple in self.word_tuples[self.current_word:end_range]: + file_str.write('' + word_tuple[2]) + i = i + 1 + file_str.write('') + return file_str.getvalue() + + def get_more_text(self): + self.current_word = self.current_word + 1 + if self.current_word < len(self.word_tuples): + speech.stop() + more_text = self.get_marked_words() + speech.play(more_text) + + def highlight_next_word(self, word_count): + self.current_word = word_count + return True def connect_zoom_handler(self, handler): self._zoom_handler = handler diff --git a/epubview/epubview.py b/epubview/epubview.py index 9ebb319..4d3a130 100644 --- a/epubview/epubview.py +++ b/epubview/epubview.py @@ -22,6 +22,7 @@ import widgets import os.path import math import shutil +import BeautifulSoup from epub import _Epub from jobs import _JobPaginator as _Paginator @@ -423,6 +424,38 @@ class _View(gtk.HBox): if pageno != self._loaded_page: self._on_page_changed(0, int(pageno)) + # prepare text to speech + html_file = open(self._loaded_filename) + soup = BeautifulSoup.BeautifulSoup(html_file) + body = soup.find('body') + tags = body.findAll(text=True) + self._all_text = ''.join([tag for tag in tags]) + self._prepare_text_to_speech(self._all_text) + + def _prepare_text_to_speech(self, page_text): + i = 0 + j = 0 + word_begin = 0 + word_end = 0 + ignore_chars = [' ', '\n', u'\r', '_', '[', '{', ']', '}', '|', + '<', '>', '*', '+', '/', '\\'] + ignore_set = set(ignore_chars) + self.word_tuples = [] + len_page_text = len(page_text) + while i < len_page_text: + if page_text[i] not in ignore_set: + word_begin = i + j = i + while j < len_page_text and page_text[j] not in ignore_set: + j = j + 1 + word_end = j + i = j + word_tuple = (word_begin, word_end, + page_text[word_begin: word_end]) + if word_tuple[2] != u'\r': + self.word_tuples.append(word_tuple) + i = i + 1 + def _scroll_page_end(self): v_upper = self._v_vscrollbar.props.adjustment.props.upper v_page_size = self._v_vscrollbar.props.adjustment.props.page_size diff --git a/speech.py b/speech.py index 3197857..d950fbd 100644 --- a/speech.py +++ b/speech.py @@ -40,4 +40,5 @@ pitch = 0 rate = 0 highlight_cb = None +end_text_cb = None reset_cb = None diff --git a/speech_gst.py b/speech_gst.py index 4627c75..329f8d3 100644 --- a/speech_gst.py +++ b/speech_gst.py @@ -23,7 +23,11 @@ _logger = logging.getLogger('read-etexts-activity') def _message_cb(bus, message, pipe): - if message.type in (gst.MESSAGE_EOS, gst.MESSAGE_ERROR): + if message.type == gst.MESSAGE_EOS: + pipe.set_state(gst.STATE_NULL) + if speech.end_text_cb != None: + speech.end_text_cb() + if message.type == gst.MESSAGE_ERROR: pipe.set_state(gst.STATE_NULL) if pipe is play_speaker[1]: speech.reset_cb() -- cgit v0.9.1