Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGonzalo Odiard <godiard@sugarlabs.org>2011-02-24 20:30:58 (GMT)
committer Gonzalo Odiard <godiard@sugarlabs.org>2011-02-24 20:30:58 (GMT)
commit2123bbd08ca8a9f3852832570b205a5a972a3055 (patch)
tree2019e84561769109385209ee345fc33f7c471cd0
parent9202daa7d92ada1f1536d812b0e1a62af9366192 (diff)
Initial implementation of text to speech in epub backend
No highlight of spoken word yet.
-rw-r--r--epubadapter.py35
-rw-r--r--epubview/epubview.py33
-rw-r--r--speech.py1
-rw-r--r--speech_gst.py6
4 files changed, 73 insertions, 2 deletions
diff --git a/epubadapter.py b/epubadapter.py
index 2b1b597..ab6eb01 100644
--- a/epubadapter.py
+++ b/epubadapter.py
@@ -2,6 +2,9 @@ import gobject
import logging
import epubview
+import speech
+
+from cStringIO import StringIO
_logger = logging.getLogger('read-activity')
@@ -18,9 +21,14 @@ class EpubViewer(epubview.EpubView):
activity._hbox.pack_start(self, expand=True, fill=True)
self.show_all()
+ # text to speech initialization
+ self.current_word = 0
+ self.word_tuples = []
def load_document(self, file_path):
self.set_document(EpubDocument(self, file_path.replace('file://', '')))
+ speech.highlight_cb = self.highlight_next_word
+ speech.end_text_cb = self.get_more_text
def load_metadata(self, activity):
@@ -50,7 +58,32 @@ class EpubViewer(epubview.EpubView):
return False
def can_do_text_to_speech(self):
- return False
+ return True
+
+ def get_marked_words(self):
+ "Adds a mark between each word of text."
+ i = self.current_word
+ file_str = StringIO()
+ file_str.write('<speak> ')
+ end_range = i + 40
+ if end_range > len(self.word_tuples):
+ end_range = len(self.word_tuples)
+ for word_tuple in self.word_tuples[self.current_word:end_range]:
+ file_str.write('<mark name="' + str(i) + '"/>' + word_tuple[2])
+ i = i + 1
+ file_str.write('</speak>')
+ return file_str.getvalue()
+
+ def get_more_text(self):
+ self.current_word = self.current_word + 1
+ if self.current_word < len(self.word_tuples):
+ speech.stop()
+ more_text = self.get_marked_words()
+ speech.play(more_text)
+
+ def highlight_next_word(self, word_count):
+ self.current_word = word_count
+ return True
def connect_zoom_handler(self, handler):
self._zoom_handler = handler
diff --git a/epubview/epubview.py b/epubview/epubview.py
index 9ebb319..4d3a130 100644
--- a/epubview/epubview.py
+++ b/epubview/epubview.py
@@ -22,6 +22,7 @@ import widgets
import os.path
import math
import shutil
+import BeautifulSoup
from epub import _Epub
from jobs import _JobPaginator as _Paginator
@@ -423,6 +424,38 @@ class _View(gtk.HBox):
if pageno != self._loaded_page:
self._on_page_changed(0, int(pageno))
+ # prepare text to speech
+ html_file = open(self._loaded_filename)
+ soup = BeautifulSoup.BeautifulSoup(html_file)
+ body = soup.find('body')
+ tags = body.findAll(text=True)
+ self._all_text = ''.join([tag for tag in tags])
+ self._prepare_text_to_speech(self._all_text)
+
+ def _prepare_text_to_speech(self, page_text):
+ i = 0
+ j = 0
+ word_begin = 0
+ word_end = 0
+ ignore_chars = [' ', '\n', u'\r', '_', '[', '{', ']', '}', '|',
+ '<', '>', '*', '+', '/', '\\']
+ ignore_set = set(ignore_chars)
+ self.word_tuples = []
+ len_page_text = len(page_text)
+ while i < len_page_text:
+ if page_text[i] not in ignore_set:
+ word_begin = i
+ j = i
+ while j < len_page_text and page_text[j] not in ignore_set:
+ j = j + 1
+ word_end = j
+ i = j
+ word_tuple = (word_begin, word_end,
+ page_text[word_begin: word_end])
+ if word_tuple[2] != u'\r':
+ self.word_tuples.append(word_tuple)
+ i = i + 1
+
def _scroll_page_end(self):
v_upper = self._v_vscrollbar.props.adjustment.props.upper
v_page_size = self._v_vscrollbar.props.adjustment.props.page_size
diff --git a/speech.py b/speech.py
index 3197857..d950fbd 100644
--- a/speech.py
+++ b/speech.py
@@ -40,4 +40,5 @@ pitch = 0
rate = 0
highlight_cb = None
+end_text_cb = None
reset_cb = None
diff --git a/speech_gst.py b/speech_gst.py
index 4627c75..329f8d3 100644
--- a/speech_gst.py
+++ b/speech_gst.py
@@ -23,7 +23,11 @@ _logger = logging.getLogger('read-etexts-activity')
def _message_cb(bus, message, pipe):
- if message.type in (gst.MESSAGE_EOS, gst.MESSAGE_ERROR):
+ if message.type == gst.MESSAGE_EOS:
+ pipe.set_state(gst.STATE_NULL)
+ if speech.end_text_cb != None:
+ speech.end_text_cb()
+ if message.type == gst.MESSAGE_ERROR:
pipe.set_state(gst.STATE_NULL)
if pipe is play_speaker[1]:
speech.reset_cb()