Initial implementation of text to speech in epub backend

No highlight of spoken word yet.
author: Gonzalo Odiard <godiard@sugarlabs.org> 2011-02-24 20:30:58 (GMT)
committer: Gonzalo Odiard <godiard@sugarlabs.org> 2011-02-24 20:30:58 (GMT)
commit: 2123bbd08ca8a9f3852832570b205a5a972a3055 (patch)
tree: 2019e84561769109385209ee345fc33f7c471cd0 /epubview
parent: 9202daa7d92ada1f1536d812b0e1a62af9366192 (diff)
1 files changed, 33 insertions, 0 deletions
diff --git a/epubview/epubview.py b/epubview/epubview.py
index 9ebb319..4d3a130 100644
--- a/epubview/epubview.py
+++ b/epubview/epubview.py
@@ -22,6 +22,7 @@ import widgets
 import os.path
 import math
 import shutil
+import BeautifulSoup
 
 from epub import _Epub
 from jobs import _JobPaginator as _Paginator
@@ -423,6 +424,38 @@ class _View(gtk.HBox):
         if pageno != self._loaded_page:
             self._on_page_changed(0, int(pageno))
 
+        # prepare text to speech
+        html_file = open(self._loaded_filename)
+        soup = BeautifulSoup.BeautifulSoup(html_file)
+        body = soup.find('body')
+        tags = body.findAll(text=True)
+        self._all_text = ''.join([tag for tag in tags])
+        self._prepare_text_to_speech(self._all_text)
+
+    def _prepare_text_to_speech(self, page_text):
+        i = 0
+        j = 0
+        word_begin = 0
+        word_end = 0
+        ignore_chars = [' ',  '\n',  u'\r',  '_',  '[', '{', ']', '}', '|',
+                '<',  '>',  '*',  '+',  '/',  '\\']
+        ignore_set = set(ignore_chars)
+        self.word_tuples = []
+        len_page_text = len(page_text)
+        while i < len_page_text:
+            if page_text[i] not in ignore_set:
+                word_begin = i
+                j = i
+                while  j < len_page_text and page_text[j] not in ignore_set:
+                    j = j + 1
+                    word_end = j
+                    i = j
+                word_tuple = (word_begin, word_end,
+                        page_text[word_begin: word_end])
+                if word_tuple[2] != u'\r':
+                    self.word_tuples.append(word_tuple)
+            i = i + 1
+
     def _scroll_page_end(self):
         v_upper = self._v_vscrollbar.props.adjustment.props.upper
         v_page_size = self._v_vscrollbar.props.adjustment.props.page_size
author	Gonzalo Odiard <godiard@sugarlabs.org>	2011-02-24 20:30:58 (GMT)
committer	Gonzalo Odiard <godiard@sugarlabs.org>	2011-02-24 20:30:58 (GMT)
commit	2123bbd08ca8a9f3852832570b205a5a972a3055 (patch)
tree	2019e84561769109385209ee345fc33f7c471cd0 /epubview
parent	9202daa7d92ada1f1536d812b0e1a62af9366192 (diff)