From d83c269464cf301fc71067ecf7702eefd6f5816d Mon Sep 17 00:00:00 2001
From: Aleksey Lim <alsroot@member.fsf.org>
Date: Thu, 05 Feb 2009 23:35:29 +0000
Subject: Switch Espeak to gst-plugins-espeak

---
diff --git a/Speak.activity/activity.py b/Speak.activity/activity.py
index ca933e4..06f3535 100755
--- a/Speak.activity/activity.py
+++ b/Speak.activity/activity.py
@@ -22,34 +22,19 @@
 #     along with Speak.activity.  If not, see <http://www.gnu.org/licenses/>.
 
 
-import sys
-import os
-from urllib import (quote, unquote)
-import subprocess
-import random
 from sugar.activity import activity
-from sugar.datastore import datastore
 from sugar.presence import presenceservice
 import logging 
 import gtk
 import gobject
 import pango
+import cjson
 from gettext import gettext as _
 
-# try:
-#     sys.path.append('/usr/lib/python2.4/site-packages') # for speechd
-#     import speechd.client
-# except:
-#     print "Speech-dispatcher not found."
-
 from sugar.graphics.toolbutton import ToolButton
 from sugar.graphics.toolcombobox import ToolComboBox
 from sugar.graphics.combobox import ComboBox
 
-import pygst
-pygst.require("0.10")
-import gst
-
 import eye
 import glasses
 import mouth
@@ -62,6 +47,8 @@ from chat import Chat
 from collab import CollabActivity
 from messenger import Messenger, SERVICE
 
+logger = logging.getLogger('speak')
+
 CHAT_TOOLBAR = 3
 
 class SpeakActivity(CollabActivity):
@@ -163,11 +150,6 @@ class SpeakActivity(CollabActivity):
         #     return True
         # gobject.timeout_add(100, poll_mouse)
         
-        # say hello to the user
-        presenceService = presenceservice.get_instance()
-        xoOwner = presenceService.get_owner()
-        self.face.say(_("Hello %s.  Type something.") % xoOwner.props.nick)
-
         # XXX do it after(possible) read_file() invoking
         # have to rely on calling read_file() from map_cb in sugar-toolkit
         self.connect_after('map', self.connect_to)
@@ -181,80 +163,41 @@ class SpeakActivity(CollabActivity):
         self.numeyesadj.connect("value_changed", self.eyes_changed_cb, False)
         self.eye_shape_combo.connect('changed', self.eyes_changed_cb, False)
         self.eyes_changed_cb(None, True)
+
         self.face.look_ahead()
 
+        # say hello to the user
+        presenceService = presenceservice.get_instance()
+        xoOwner = presenceService.get_owner()
+        self.face.say(_("Hello %s.  Type something.") % xoOwner.props.nick)
+
     def write_file(self, file_path):
-        f = open(file_path, "w")
-        f.write("speak file format v1\n")
-        f.write("voice=%s\n" % quote(self.face.status.voice.friendlyname))
-        f.write("text=%s\n" % quote(self.entry.props.text))
-        history = map(lambda i: i[0], self.entrycombo.get_model())
-        f.write("history=[%s]\n" % ",".join(map(quote, history)))
-        f.write("pitch=%d\n" % self.pitchadj.value)
-        f.write("rate=%d\n" % self.rateadj.value)
-        f.write("mouth_shape=%s\n" % quote(self.mouth_shape_combo.get_active_item()[1]))
-        f.write("eye_shape=%s\n" % quote(self.eye_shape_combo.get_active_item()[1]))
-        f.write("num_eyes=%d\n" % self.numeyesadj.value)
-        f.close()
-        
-        f = open(file_path, "r")
-        print f.readlines()
-        f.close()
-        
+        cfg = { 'status'  : self.face.status.serialize(),
+                'text'    : self.entry.props.text,
+                'history' : map(lambda i: i[0], self.entrycombo.get_model()) }
+        file(file_path, 'w').write(cjson.encode(cfg))
         
     def read_file(self, file_path):
-        
-        def pick_combo_item(combo, name):
-            index = 0
-            model = combo.get_model()
-            for item in model:
-                if item[1] == name:
-                    combo.set_active(index)
-                    return True
-                index += 1
-            return False
-        
-        f = open(file_path, "r")
-        header = f.readline().strip()
-        if header != "speak file format v1":
-            print "Reading format from the future '%s', will try my best." % header
-        for line in f.readlines():
-            line = line.strip()
-            index = line.find('=')
-            key = line[:index]
-            value = line[index+1:]
-            if key == 'voice':
-                voice_name = unquote(value)
-                found = pick_combo_item(self.voice_combo, voice_name)
-                if not found:
-                    print "Unrecognized voice name: %s" % voice_name
-            elif key == 'text':
-                self.entry.props.text = unquote(value)
-            elif key == 'history':
-                if value[0]=='[' and value[-1]==']':
-                    for item in value[1:-1].split(','):
-                        self.entrycombo.append_text(unquote(item))
-                else:
-                    print "Unrecognized history: %s" % value
-            elif key == 'pitch':
-                self.pitchadj.value = int(value)
-            elif key == 'rate':
-                self.rateadj.value = int(value)
-            elif key == 'mouth_shape':
-                mouth_name = unquote(value)
-                found = pick_combo_item(self.mouth_shape_combo, mouth_name)
-                if not found:
-                    print "Unrecognized mouth shape: %s" % mouth_name
-            elif key == 'eye_shape':
-                eye_name = unquote(value)
-                found = pick_combo_item(self.eye_shape_combo, eye_name)
-                if not found:
-                    print "Unrecognized eye shape: %s" % eye_name
-            elif key == 'num_eyes':
-                self.numeyesadj.value = int(value)
-            else:
-                print "Ignoring unrecognized line: %s" % line
-        f.close()
+        cfg = cjson.decode(file(file_path, 'r').read())
+
+        def pick_combo_item(combo, col, obj):
+            for i, item in enumerate(combo.get_model()):
+                if item[col] == obj:
+                    combo.set_active(i)
+                    return
+            logger.warning("Unrecognized loaded value: %s" % obj)
+
+        status = self.face.status = face.Status().deserialize(cfg['status'])
+        pick_combo_item(self.voice_combo, 1, status.voice.friendlyname)
+        self.pitchadj.value = self.face.status.pitch
+        self.rateadj.value = self.face.status.rate
+        pick_combo_item(self.mouth_shape_combo, 0, status.mouth)
+        pick_combo_item(self.eye_shape_combo, 0, status.eyes[0])
+        self.numeyesadj.value = len(status.eyes)
+
+        self.entry.props.text = cfg['text']
+        for i in cfg['history']:
+            self.entrycombo.append_text(i)
 
     def _cursor_moved_cb(self, entry, *ignored):
         # make the eyes track the motion of the text cursor
@@ -446,15 +389,16 @@ class SpeakActivity(CollabActivity):
     def _activeCb( self, widget, pspec ):
         # only generate sound when this activity is active
         if not self.props.active:
-            self.face.quiet()
-        else:
-            self.face.verbose()
+            self.face.shut_up()
+            self.chat.shut_up()
 
     def _toolbar_changed_cb(self, widget, index):
         if index == CHAT_TOOLBAR:
+            self.face.shut_up()
             self.chat.me.update(self.face.status)
             self.notebook.set_current_page(1)
         else:
+            self.chat.shut_up()
             self.notebook.set_current_page(0)
 
     def on_tube(self, tube_conn, initiating):
diff --git a/Speak.activity/audio.py b/Speak.activity/audio.py
index 1176fb5..646dac9 100644
--- a/Speak.activity/audio.py
+++ b/Speak.activity/audio.py
@@ -23,17 +23,17 @@
 
 # This code is a stripped down version of the audio grabber from Measure
 
-import pygst
-pygst.require("0.10")
 import gst
 import pygtk
 import gtk, gobject
 import signal, os
 import time
 import dbus
-import audioop
+import logging
 from struct import *
 
+logger = logging.getLogger('speak')
+
 class AudioGrab(gobject.GObject):
     __gsignals__ = {
         'new-buffer': (gobject.SIGNAL_RUN_FIRST, None, [gobject.TYPE_PYOBJECT])
@@ -43,13 +43,29 @@ class AudioGrab(gobject.GObject):
         gobject.GObject.__init__(self)
         self.pipeline = None
 
-    def playfile(self, filename):
+    def playfile(self, status, text):
+        pitch = int(status.pitch)
+        rate = int(status.rate)
+        # espeak uses 80 to 370
+        rate = 80 + (370-80) * rate / 100
+
+        logger.debug('pitch=%d rate=%d voice=%s text=%s' % (pitch, rate,
+                status.voice.name, text))
+
         self.stop_sound_device()
+        self._quiet = False
         
         # build a pipeline that reads the given file
         # and sends it to both the real audio output
         # and a fake one that we use to draw from
-        p = 'filesrc name=file-source ! decodebin ! tee name=tee tee.! audioconvert ! alsasink tee.! queue ! audioconvert name=conv'
+        p = 'espeak text="%s" pitch=%d rate=%d voice=%s ' \
+            '! decodebin ' \
+            '! tee name=tee ' \
+            'tee.! audioconvert ' \
+                '! alsasink ' \
+            'tee.! queue ' \
+                '! audioconvert name=conv' \
+                % (text, pitch, rate, status.voice.name)
         self.pipeline = gst.parse_launch(p)
         
         # make a fakesink to capture audio
@@ -58,13 +74,14 @@ class AudioGrab(gobject.GObject):
         fakesink.set_property("signal-handoffs",True)
         self.pipeline.add(fakesink)
 
+        bus = self.pipeline.get_bus()
+        bus.add_signal_watch()
+        bus.connect('message', self._gstmessage_cb)
+
         # attach it to the pipeline
         conv = self.pipeline.get_by_name("conv")
         gst.element_link_many(conv, fakesink)
         
-        # set the source file
-        self.pipeline.get_by_name("file-source").set_property('location', filename)
-        
         # play
         self.restart_sound_device()
         
@@ -72,42 +89,22 @@ class AudioGrab(gobject.GObject):
         # we should stop the sound device and stop emitting buffers
         # to save on CPU and battery usage when there is no audio playing
 
-    def playfd(self, fd):
-        self.stop_sound_device()
-
-        # build a pipeline that reads the given file
-        # and sends it to both the real audio output
-        # and a fake one that we use to draw from
-        if self.pipeline is None:
-            p = 'fdsrc name=fd-source ! wavparse ! tee name=tee tee.! audioconvert ! alsasink tee.! queue ! audioconvert name=conv'
-            self.pipeline = gst.parse_launch(p)
-
-            # make a fakesink to capture audio
-            fakesink = gst.element_factory_make("fakesink", "fakesink")
-            fakesink.connect("handoff",self.on_buffer)
-            fakesink.set_property("signal-handoffs",True)
-            self.pipeline.add(fakesink)
+    def _gstmessage_cb(self, bus, message):
+        type = message.type
 
-            # attach it to the pipeline
-            conv = self.pipeline.get_by_name("conv")
-            gst.element_link_many(conv, fakesink)
+        if type == gst.MESSAGE_EOS:
+            # END OF SOUND FILE
+            self.stop_sound_device()
+        elif type == gst.MESSAGE_ERROR:
+            self.stop_sound_device()
 
-        # set the source file
-        self.pipeline.get_by_name("fd-source").set_property('fd', fd)
-
-        # play
-        self.restart_sound_device()
-
-        # how do we detect when the sample has finished playing?
-        # we should stop the sound device and stop emitting buffers
-        # to save on CPU and battery usage when there is no audio playing
-        
     def on_quit(self):
         self.pipeline.set_state(gst.STATE_NULL)
 
     def _new_buffer(self, buf):
-        # pass captured audio to anyone who is interested via the main thread
-        self.emit("new-buffer", buf)
+        if not self._quiet:
+            # pass captured audio to anyone who is interested via the main thread
+            self.emit("new-buffer", buf)
         return False
 
     def on_buffer(self,element,buffer,pad):
@@ -116,9 +113,15 @@ class AudioGrab(gobject.GObject):
         return True
 
     def stop_sound_device(self):
-        if self.pipeline is not None:
-            self.pipeline.set_state(gst.STATE_NULL)
+        if self.pipeline is None:
+            return
+
+        self.pipeline.set_state(gst.STATE_NULL)
+        # Shut theirs mouths down
+        self._new_buffer('')
+        self._quiet = True
 
     def restart_sound_device(self):
+        self.pipeline.set_state(gst.STATE_NULL)
         self.pipeline.set_state(gst.STATE_PLAYING)
 
diff --git a/Speak.activity/chat.py b/Speak.activity/chat.py
index fa78c91..fd2f805 100644
--- a/Speak.activity/chat.py
+++ b/Speak.activity/chat.py
@@ -144,6 +144,11 @@ class Chat(hippo.Canvas):
         if len(self._buddies) == 0:
             self._desk.remove(self._buddies_box)
 
+    def shut_up(self):
+        for i in self._buddies:
+            i['face'].shut_up();
+        self.me.shut_up();
+
     def _add_buddy(self, buddy):
         box = hippo.CanvasBox(
                 orientation = hippo.ORIENTATION_HORIZONTAL,
diff --git a/Speak.activity/face.py b/Speak.activity/face.py
index e77fe8f..61edaa7 100644
--- a/Speak.activity/face.py
+++ b/Speak.activity/face.py
@@ -22,36 +22,13 @@
 #     along with Speak.activity.  If not, see <http://www.gnu.org/licenses/>.
 
 
-import sys
-import os
-from urllib import (quote, unquote)
-import subprocess
-import random
-from sugar.activity import activity
-from sugar.datastore import datastore
-from sugar.presence import presenceservice
 import logging 
 import gtk
-import gobject
-import pango
 import cjson
 from gettext import gettext as _
 
-# try:
-#     sys.path.append('/usr/lib/python2.4/site-packages') # for speechd
-#     import speechd.client
-# except:
-#     print "Speech-dispatcher not found."
-
-from sugar.graphics.toolbutton import ToolButton
-from sugar.graphics.toolcombobox import ToolComboBox
-from sugar.graphics.combobox import ComboBox
 import sugar.graphics.style as style
 
-import pygst
-pygst.require("0.10")
-import gst
-
 import audio
 import eye
 import glasses
@@ -62,8 +39,8 @@ import waveform_mouth
 
 logger = logging.getLogger('speak')
 
-PITCH_MAX = 100
-RATE_MAX = 100
+PITCH_MAX = 99
+RATE_MAX = 99
 FACE_PAD = 2
 
 class Status:
@@ -83,7 +60,6 @@ class Status:
 
         return cjson.encode({
             'voice' : { 'language'  : self.voice.language,
-                        'gender'    : self.voice.gender,
                         'name'      : self.voice.name },
             'pitch' : self.pitch,
             'rate'  : self.rate,
@@ -98,14 +74,15 @@ class Status:
                     3: waveform_mouth.WaveformMouth }
 
         data = cjson.decode(buf)
-        self.voice.language = data['voice']['language']
-        self.voice.gender = data['voice']['gender']
-        self.voice.name = data['voice']['name']
+        self.voice = voice.Voice(data['voice']['language'],
+                data['voice']['name'])
         self.pitch = data['pitch']
         self.rate = data['rate']
         self.eyes = [eyes[i] for i in data['eyes']]
         self.mouth = mouths[data['mouth']]
 
+        return self
+
 class View(gtk.EventBox):
     def __init__(self, fill_color=style.COLOR_BUTTON_GREY):
         gtk.EventBox.__init__(self)
@@ -116,18 +93,6 @@ class View(gtk.EventBox):
         self.connect('size-allocate', self._size_allocate_cb)
 
         self._audio = audio.AudioGrab()
-        self._synth = None
-        # try:
-        #     self._synth = speechd.client.SSIPClient("Speak.activity")
-        #     try:
-        #         # Try some speechd v0.6.6 features
-        #         print "Output modules:", self._synth.list_output_modules()
-        #         print "Voices:", self._synth.list_synthesis_voices()
-        #     except:
-        #         pass
-        # except:
-        #     self._synth = None
-        #     print "Falling back to espeak command line tool."
 
         # make an empty box for some eyes
         self._eyes = None
@@ -186,39 +151,10 @@ class View(gtk.EventBox):
         #self._mouth.add_events(gtk.gdk.POINTER_MOTION_MASK)
 
     def say(self, something):
-        if self._audio is None:
-            return
-        
-        logger.debug('%s: %s' % (self.status.voice.name, something))
-        pitch = int(self.status.pitch)
-        rate = int(self.status.rate)
-
-        if self._synth is not None:
-            # speechd uses -100 to 100
-            pitch = pitch*2 - 100
-            # speechd uses -100 to 100
-            rate = rate*2 - 100
-
-            self._synth.set_rate(rate)
-            self._synth.set_pitch(pitch)
-            self._synth.set_language(self.status.voice.language)
-            self._synth.speak(something) #, callback=self._synth_cb)
-        else:
-            # espeak uses 0 to 99
-            pitch = pitch
-            # espeak uses 80 to 370
-            rate = 80 + (370-80) * rate / 100
-
-            # ideally we would stream the audio instead of writing to disk each time...
-            wavpath = "/tmp/speak.wav"
-            subprocess.call(["espeak", "-w", wavpath, "-p", str(pitch), "-s", str(rate), "-v", self.status.voice.name, something], stdout=subprocess.PIPE)
-            self._audio.playfile(wavpath)
+        self._audio.playfile(self.status, something)
     
-    def quiet(self):
+    def shut_up(self):
         self._audio.stop_sound_device()
 
-    def verbose(self):
-        self._audio.restart_sound_device()
-
     def _size_allocate_cb(self, widget, allocation):
         self._mouthbox.set_size_request(-1, int(allocation.height/2.5))
diff --git a/Speak.activity/mouth.py b/Speak.activity/mouth.py
index 404eb3d..8b72f4b 100644
--- a/Speak.activity/mouth.py
+++ b/Speak.activity/mouth.py
@@ -50,7 +50,7 @@ class Mouth(gtk.DrawingArea):
         return True
 
     def processBuffer(self, bounds):
-        if len(self.main_buffers) == 0:
+        if len(self.main_buffers) == 0 or len(self.newest_buffer) == 0:
             self.volume = 0
         else:
             self.volume = numpy.core.max(self.main_buffers)# - numpy.core.min(self.main_buffers)
diff --git a/Speak.activity/voice.py b/Speak.activity/voice.py
index fda72dc..aaf5ea2 100644
--- a/Speak.activity/voice.py
+++ b/Speak.activity/voice.py
@@ -22,8 +22,8 @@
 #     along with Speak.activity.  If not, see <http://www.gnu.org/licenses/>.
 
 
-import subprocess
 import re, os
+import gst
 from gettext import gettext as _
 
 # Lets trick gettext into generating entries for the voice names we expect espeak to have
@@ -68,9 +68,8 @@ expectedVoiceNames = [
 _allVoices = {}
 
 class Voice:
-    def __init__(self, language, gender, name):
+    def __init__(self, language, name):
         self.language = language
-        self.gender = gender
         self.name = name
 
         friendlyname = name
@@ -83,16 +82,14 @@ class Voice:
     
 def allVoices():
     if len(_allVoices) == 0:
-        result = subprocess.Popen(["espeak", "--voices"], stdout=subprocess.PIPE).communicate()[0]
-        for line in result.split('\n'):
-            m = re.match(r'\s*\d+\s+([\w-]+)\s+([MF])\s+([\w_-]+)\s+(.+)', line)
-            if m:
-                language, gender, name, stuff = m.groups()
-                if stuff.startswith('mb/') or name in ('en-rhotic','english_rp','english_wmids'):
-                    # these voices don't produce sound
-                    continue
-                voice = Voice(language, gender, name)
-                _allVoices[voice.friendlyname] = voice
+        for i in gst.element_factory_make('espeak').props.voices:
+            name, language = i.split(':')
+            if name in ('en-rhotic','english_rp','english_wmids'):
+                # these voices don't produce sound
+                continue
+            voice = Voice(language, name)
+            _allVoices[voice.friendlyname] = voice
+
     return _allVoices
 
 def defaultVoice():
--
cgit v0.9.1