diff options
author | Gonzalo Odiard <godiard@sugarlabs.org> | 2011-02-24 20:30:58 (GMT) |
---|---|---|
committer | Gonzalo Odiard <godiard@sugarlabs.org> | 2011-02-24 20:30:58 (GMT) |
commit | 2123bbd08ca8a9f3852832570b205a5a972a3055 (patch) | |
tree | 2019e84561769109385209ee345fc33f7c471cd0 /epubview | |
parent | 9202daa7d92ada1f1536d812b0e1a62af9366192 (diff) |
Initial implementation of text to speech in epub backend
No highlight of spoken word yet.
Diffstat (limited to 'epubview')
-rw-r--r-- | epubview/epubview.py | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/epubview/epubview.py b/epubview/epubview.py index 9ebb319..4d3a130 100644 --- a/epubview/epubview.py +++ b/epubview/epubview.py @@ -22,6 +22,7 @@ import widgets import os.path import math import shutil +import BeautifulSoup from epub import _Epub from jobs import _JobPaginator as _Paginator @@ -423,6 +424,38 @@ class _View(gtk.HBox): if pageno != self._loaded_page: self._on_page_changed(0, int(pageno)) + # prepare text to speech + html_file = open(self._loaded_filename) + soup = BeautifulSoup.BeautifulSoup(html_file) + body = soup.find('body') + tags = body.findAll(text=True) + self._all_text = ''.join([tag for tag in tags]) + self._prepare_text_to_speech(self._all_text) + + def _prepare_text_to_speech(self, page_text): + i = 0 + j = 0 + word_begin = 0 + word_end = 0 + ignore_chars = [' ', '\n', u'\r', '_', '[', '{', ']', '}', '|', + '<', '>', '*', '+', '/', '\\'] + ignore_set = set(ignore_chars) + self.word_tuples = [] + len_page_text = len(page_text) + while i < len_page_text: + if page_text[i] not in ignore_set: + word_begin = i + j = i + while j < len_page_text and page_text[j] not in ignore_set: + j = j + 1 + word_end = j + i = j + word_tuple = (word_begin, word_end, + page_text[word_begin: word_end]) + if word_tuple[2] != u'\r': + self.word_tuples.append(word_tuple) + i = i + 1 + def _scroll_page_end(self): v_upper = self._v_vscrollbar.props.adjustment.props.upper v_page_size = self._v_vscrollbar.props.adjustment.props.page_size |