From 2123bbd08ca8a9f3852832570b205a5a972a3055 Mon Sep 17 00:00:00 2001
From: Gonzalo Odiard <godiard@sugarlabs.org>
Date: Thu, 24 Feb 2011 20:30:58 +0000
Subject: Initial implementation of text to speech in epub backend

No highlight of spoken word yet.
---
diff --git a/epubadapter.py b/epubadapter.py
index 2b1b597..ab6eb01 100644
--- a/epubadapter.py
+++ b/epubadapter.py
@@ -2,6 +2,9 @@ import gobject
 import logging
 
 import epubview
+import speech
+
+from cStringIO import StringIO
 
 _logger = logging.getLogger('read-activity')
 
@@ -18,9 +21,14 @@ class EpubViewer(epubview.EpubView):
 
         activity._hbox.pack_start(self, expand=True, fill=True)
         self.show_all()
+        # text to speech initialization
+        self.current_word = 0
+        self.word_tuples = []
 
     def load_document(self, file_path):
         self.set_document(EpubDocument(self, file_path.replace('file://', '')))
+        speech.highlight_cb = self.highlight_next_word
+        speech.end_text_cb = self.get_more_text
 
     def load_metadata(self, activity):
 
@@ -50,7 +58,32 @@ class EpubViewer(epubview.EpubView):
         return False
 
     def can_do_text_to_speech(self):
-        return False
+        return True
+
+    def get_marked_words(self):
+        "Adds a mark between each word of text."
+        i = self.current_word
+        file_str = StringIO()
+        file_str.write('<speak> ')
+        end_range = i + 40
+        if end_range > len(self.word_tuples):
+            end_range = len(self.word_tuples)
+        for word_tuple in self.word_tuples[self.current_word:end_range]:
+            file_str.write('<mark name="' + str(i) + '"/>' + word_tuple[2])
+            i = i + 1
+        file_str.write('</speak>')
+        return file_str.getvalue()
+
+    def get_more_text(self):
+        self.current_word = self.current_word + 1
+        if self.current_word < len(self.word_tuples):
+            speech.stop()
+            more_text = self.get_marked_words()
+            speech.play(more_text)
+
+    def highlight_next_word(self,  word_count):
+        self.current_word = word_count
+        return True
 
     def connect_zoom_handler(self, handler):
         self._zoom_handler = handler
diff --git a/epubview/epubview.py b/epubview/epubview.py
index 9ebb319..4d3a130 100644
--- a/epubview/epubview.py
+++ b/epubview/epubview.py
@@ -22,6 +22,7 @@ import widgets
 import os.path
 import math
 import shutil
+import BeautifulSoup
 
 from epub import _Epub
 from jobs import _JobPaginator as _Paginator
@@ -423,6 +424,38 @@ class _View(gtk.HBox):
         if pageno != self._loaded_page:
             self._on_page_changed(0, int(pageno))
 
+        # prepare text to speech
+        html_file = open(self._loaded_filename)
+        soup = BeautifulSoup.BeautifulSoup(html_file)
+        body = soup.find('body')
+        tags = body.findAll(text=True)
+        self._all_text = ''.join([tag for tag in tags])
+        self._prepare_text_to_speech(self._all_text)
+
+    def _prepare_text_to_speech(self, page_text):
+        i = 0
+        j = 0
+        word_begin = 0
+        word_end = 0
+        ignore_chars = [' ',  '\n',  u'\r',  '_',  '[', '{', ']', '}', '|',
+                '<',  '>',  '*',  '+',  '/',  '\\']
+        ignore_set = set(ignore_chars)
+        self.word_tuples = []
+        len_page_text = len(page_text)
+        while i < len_page_text:
+            if page_text[i] not in ignore_set:
+                word_begin = i
+                j = i
+                while  j < len_page_text and page_text[j] not in ignore_set:
+                    j = j + 1
+                    word_end = j
+                    i = j
+                word_tuple = (word_begin, word_end,
+                        page_text[word_begin: word_end])
+                if word_tuple[2] != u'\r':
+                    self.word_tuples.append(word_tuple)
+            i = i + 1
+
     def _scroll_page_end(self):
         v_upper = self._v_vscrollbar.props.adjustment.props.upper
         v_page_size = self._v_vscrollbar.props.adjustment.props.page_size
diff --git a/speech.py b/speech.py
index 3197857..d950fbd 100644
--- a/speech.py
+++ b/speech.py
@@ -40,4 +40,5 @@ pitch = 0
 rate = 0
 
 highlight_cb = None
+end_text_cb = None
 reset_cb = None
diff --git a/speech_gst.py b/speech_gst.py
index 4627c75..329f8d3 100644
--- a/speech_gst.py
+++ b/speech_gst.py
@@ -23,7 +23,11 @@ _logger = logging.getLogger('read-etexts-activity')
 
 
 def _message_cb(bus, message, pipe):
-    if message.type in (gst.MESSAGE_EOS, gst.MESSAGE_ERROR):
+    if message.type == gst.MESSAGE_EOS:
+        pipe.set_state(gst.STATE_NULL)
+        if speech.end_text_cb != None:
+            speech.end_text_cb()
+    if message.type == gst.MESSAGE_ERROR:
         pipe.set_state(gst.STATE_NULL)
         if pipe is play_speaker[1]:
             speech.reset_cb()
--
cgit v0.9.1