From d83c269464cf301fc71067ecf7702eefd6f5816d Mon Sep 17 00:00:00 2001 From: Aleksey Lim Date: Thu, 05 Feb 2009 23:35:29 +0000 Subject: Switch Espeak to gst-plugins-espeak --- diff --git a/Speak.activity/activity.py b/Speak.activity/activity.py index ca933e4..06f3535 100755 --- a/Speak.activity/activity.py +++ b/Speak.activity/activity.py @@ -22,34 +22,19 @@ # along with Speak.activity. If not, see . -import sys -import os -from urllib import (quote, unquote) -import subprocess -import random from sugar.activity import activity -from sugar.datastore import datastore from sugar.presence import presenceservice import logging import gtk import gobject import pango +import cjson from gettext import gettext as _ -# try: -# sys.path.append('/usr/lib/python2.4/site-packages') # for speechd -# import speechd.client -# except: -# print "Speech-dispatcher not found." - from sugar.graphics.toolbutton import ToolButton from sugar.graphics.toolcombobox import ToolComboBox from sugar.graphics.combobox import ComboBox -import pygst -pygst.require("0.10") -import gst - import eye import glasses import mouth @@ -62,6 +47,8 @@ from chat import Chat from collab import CollabActivity from messenger import Messenger, SERVICE +logger = logging.getLogger('speak') + CHAT_TOOLBAR = 3 class SpeakActivity(CollabActivity): @@ -163,11 +150,6 @@ class SpeakActivity(CollabActivity): # return True # gobject.timeout_add(100, poll_mouse) - # say hello to the user - presenceService = presenceservice.get_instance() - xoOwner = presenceService.get_owner() - self.face.say(_("Hello %s. Type something.") % xoOwner.props.nick) - # XXX do it after(possible) read_file() invoking # have to rely on calling read_file() from map_cb in sugar-toolkit self.connect_after('map', self.connect_to) @@ -181,80 +163,41 @@ class SpeakActivity(CollabActivity): self.numeyesadj.connect("value_changed", self.eyes_changed_cb, False) self.eye_shape_combo.connect('changed', self.eyes_changed_cb, False) self.eyes_changed_cb(None, True) + self.face.look_ahead() + # say hello to the user + presenceService = presenceservice.get_instance() + xoOwner = presenceService.get_owner() + self.face.say(_("Hello %s. Type something.") % xoOwner.props.nick) + def write_file(self, file_path): - f = open(file_path, "w") - f.write("speak file format v1\n") - f.write("voice=%s\n" % quote(self.face.status.voice.friendlyname)) - f.write("text=%s\n" % quote(self.entry.props.text)) - history = map(lambda i: i[0], self.entrycombo.get_model()) - f.write("history=[%s]\n" % ",".join(map(quote, history))) - f.write("pitch=%d\n" % self.pitchadj.value) - f.write("rate=%d\n" % self.rateadj.value) - f.write("mouth_shape=%s\n" % quote(self.mouth_shape_combo.get_active_item()[1])) - f.write("eye_shape=%s\n" % quote(self.eye_shape_combo.get_active_item()[1])) - f.write("num_eyes=%d\n" % self.numeyesadj.value) - f.close() - - f = open(file_path, "r") - print f.readlines() - f.close() - + cfg = { 'status' : self.face.status.serialize(), + 'text' : self.entry.props.text, + 'history' : map(lambda i: i[0], self.entrycombo.get_model()) } + file(file_path, 'w').write(cjson.encode(cfg)) def read_file(self, file_path): - - def pick_combo_item(combo, name): - index = 0 - model = combo.get_model() - for item in model: - if item[1] == name: - combo.set_active(index) - return True - index += 1 - return False - - f = open(file_path, "r") - header = f.readline().strip() - if header != "speak file format v1": - print "Reading format from the future '%s', will try my best." % header - for line in f.readlines(): - line = line.strip() - index = line.find('=') - key = line[:index] - value = line[index+1:] - if key == 'voice': - voice_name = unquote(value) - found = pick_combo_item(self.voice_combo, voice_name) - if not found: - print "Unrecognized voice name: %s" % voice_name - elif key == 'text': - self.entry.props.text = unquote(value) - elif key == 'history': - if value[0]=='[' and value[-1]==']': - for item in value[1:-1].split(','): - self.entrycombo.append_text(unquote(item)) - else: - print "Unrecognized history: %s" % value - elif key == 'pitch': - self.pitchadj.value = int(value) - elif key == 'rate': - self.rateadj.value = int(value) - elif key == 'mouth_shape': - mouth_name = unquote(value) - found = pick_combo_item(self.mouth_shape_combo, mouth_name) - if not found: - print "Unrecognized mouth shape: %s" % mouth_name - elif key == 'eye_shape': - eye_name = unquote(value) - found = pick_combo_item(self.eye_shape_combo, eye_name) - if not found: - print "Unrecognized eye shape: %s" % eye_name - elif key == 'num_eyes': - self.numeyesadj.value = int(value) - else: - print "Ignoring unrecognized line: %s" % line - f.close() + cfg = cjson.decode(file(file_path, 'r').read()) + + def pick_combo_item(combo, col, obj): + for i, item in enumerate(combo.get_model()): + if item[col] == obj: + combo.set_active(i) + return + logger.warning("Unrecognized loaded value: %s" % obj) + + status = self.face.status = face.Status().deserialize(cfg['status']) + pick_combo_item(self.voice_combo, 1, status.voice.friendlyname) + self.pitchadj.value = self.face.status.pitch + self.rateadj.value = self.face.status.rate + pick_combo_item(self.mouth_shape_combo, 0, status.mouth) + pick_combo_item(self.eye_shape_combo, 0, status.eyes[0]) + self.numeyesadj.value = len(status.eyes) + + self.entry.props.text = cfg['text'] + for i in cfg['history']: + self.entrycombo.append_text(i) def _cursor_moved_cb(self, entry, *ignored): # make the eyes track the motion of the text cursor @@ -446,15 +389,16 @@ class SpeakActivity(CollabActivity): def _activeCb( self, widget, pspec ): # only generate sound when this activity is active if not self.props.active: - self.face.quiet() - else: - self.face.verbose() + self.face.shut_up() + self.chat.shut_up() def _toolbar_changed_cb(self, widget, index): if index == CHAT_TOOLBAR: + self.face.shut_up() self.chat.me.update(self.face.status) self.notebook.set_current_page(1) else: + self.chat.shut_up() self.notebook.set_current_page(0) def on_tube(self, tube_conn, initiating): diff --git a/Speak.activity/audio.py b/Speak.activity/audio.py index 1176fb5..646dac9 100644 --- a/Speak.activity/audio.py +++ b/Speak.activity/audio.py @@ -23,17 +23,17 @@ # This code is a stripped down version of the audio grabber from Measure -import pygst -pygst.require("0.10") import gst import pygtk import gtk, gobject import signal, os import time import dbus -import audioop +import logging from struct import * +logger = logging.getLogger('speak') + class AudioGrab(gobject.GObject): __gsignals__ = { 'new-buffer': (gobject.SIGNAL_RUN_FIRST, None, [gobject.TYPE_PYOBJECT]) @@ -43,13 +43,29 @@ class AudioGrab(gobject.GObject): gobject.GObject.__init__(self) self.pipeline = None - def playfile(self, filename): + def playfile(self, status, text): + pitch = int(status.pitch) + rate = int(status.rate) + # espeak uses 80 to 370 + rate = 80 + (370-80) * rate / 100 + + logger.debug('pitch=%d rate=%d voice=%s text=%s' % (pitch, rate, + status.voice.name, text)) + self.stop_sound_device() + self._quiet = False # build a pipeline that reads the given file # and sends it to both the real audio output # and a fake one that we use to draw from - p = 'filesrc name=file-source ! decodebin ! tee name=tee tee.! audioconvert ! alsasink tee.! queue ! audioconvert name=conv' + p = 'espeak text="%s" pitch=%d rate=%d voice=%s ' \ + '! decodebin ' \ + '! tee name=tee ' \ + 'tee.! audioconvert ' \ + '! alsasink ' \ + 'tee.! queue ' \ + '! audioconvert name=conv' \ + % (text, pitch, rate, status.voice.name) self.pipeline = gst.parse_launch(p) # make a fakesink to capture audio @@ -58,13 +74,14 @@ class AudioGrab(gobject.GObject): fakesink.set_property("signal-handoffs",True) self.pipeline.add(fakesink) + bus = self.pipeline.get_bus() + bus.add_signal_watch() + bus.connect('message', self._gstmessage_cb) + # attach it to the pipeline conv = self.pipeline.get_by_name("conv") gst.element_link_many(conv, fakesink) - # set the source file - self.pipeline.get_by_name("file-source").set_property('location', filename) - # play self.restart_sound_device() @@ -72,42 +89,22 @@ class AudioGrab(gobject.GObject): # we should stop the sound device and stop emitting buffers # to save on CPU and battery usage when there is no audio playing - def playfd(self, fd): - self.stop_sound_device() - - # build a pipeline that reads the given file - # and sends it to both the real audio output - # and a fake one that we use to draw from - if self.pipeline is None: - p = 'fdsrc name=fd-source ! wavparse ! tee name=tee tee.! audioconvert ! alsasink tee.! queue ! audioconvert name=conv' - self.pipeline = gst.parse_launch(p) - - # make a fakesink to capture audio - fakesink = gst.element_factory_make("fakesink", "fakesink") - fakesink.connect("handoff",self.on_buffer) - fakesink.set_property("signal-handoffs",True) - self.pipeline.add(fakesink) + def _gstmessage_cb(self, bus, message): + type = message.type - # attach it to the pipeline - conv = self.pipeline.get_by_name("conv") - gst.element_link_many(conv, fakesink) + if type == gst.MESSAGE_EOS: + # END OF SOUND FILE + self.stop_sound_device() + elif type == gst.MESSAGE_ERROR: + self.stop_sound_device() - # set the source file - self.pipeline.get_by_name("fd-source").set_property('fd', fd) - - # play - self.restart_sound_device() - - # how do we detect when the sample has finished playing? - # we should stop the sound device and stop emitting buffers - # to save on CPU and battery usage when there is no audio playing - def on_quit(self): self.pipeline.set_state(gst.STATE_NULL) def _new_buffer(self, buf): - # pass captured audio to anyone who is interested via the main thread - self.emit("new-buffer", buf) + if not self._quiet: + # pass captured audio to anyone who is interested via the main thread + self.emit("new-buffer", buf) return False def on_buffer(self,element,buffer,pad): @@ -116,9 +113,15 @@ class AudioGrab(gobject.GObject): return True def stop_sound_device(self): - if self.pipeline is not None: - self.pipeline.set_state(gst.STATE_NULL) + if self.pipeline is None: + return + + self.pipeline.set_state(gst.STATE_NULL) + # Shut theirs mouths down + self._new_buffer('') + self._quiet = True def restart_sound_device(self): + self.pipeline.set_state(gst.STATE_NULL) self.pipeline.set_state(gst.STATE_PLAYING) diff --git a/Speak.activity/chat.py b/Speak.activity/chat.py index fa78c91..fd2f805 100644 --- a/Speak.activity/chat.py +++ b/Speak.activity/chat.py @@ -144,6 +144,11 @@ class Chat(hippo.Canvas): if len(self._buddies) == 0: self._desk.remove(self._buddies_box) + def shut_up(self): + for i in self._buddies: + i['face'].shut_up(); + self.me.shut_up(); + def _add_buddy(self, buddy): box = hippo.CanvasBox( orientation = hippo.ORIENTATION_HORIZONTAL, diff --git a/Speak.activity/face.py b/Speak.activity/face.py index e77fe8f..61edaa7 100644 --- a/Speak.activity/face.py +++ b/Speak.activity/face.py @@ -22,36 +22,13 @@ # along with Speak.activity. If not, see . -import sys -import os -from urllib import (quote, unquote) -import subprocess -import random -from sugar.activity import activity -from sugar.datastore import datastore -from sugar.presence import presenceservice import logging import gtk -import gobject -import pango import cjson from gettext import gettext as _ -# try: -# sys.path.append('/usr/lib/python2.4/site-packages') # for speechd -# import speechd.client -# except: -# print "Speech-dispatcher not found." - -from sugar.graphics.toolbutton import ToolButton -from sugar.graphics.toolcombobox import ToolComboBox -from sugar.graphics.combobox import ComboBox import sugar.graphics.style as style -import pygst -pygst.require("0.10") -import gst - import audio import eye import glasses @@ -62,8 +39,8 @@ import waveform_mouth logger = logging.getLogger('speak') -PITCH_MAX = 100 -RATE_MAX = 100 +PITCH_MAX = 99 +RATE_MAX = 99 FACE_PAD = 2 class Status: @@ -83,7 +60,6 @@ class Status: return cjson.encode({ 'voice' : { 'language' : self.voice.language, - 'gender' : self.voice.gender, 'name' : self.voice.name }, 'pitch' : self.pitch, 'rate' : self.rate, @@ -98,14 +74,15 @@ class Status: 3: waveform_mouth.WaveformMouth } data = cjson.decode(buf) - self.voice.language = data['voice']['language'] - self.voice.gender = data['voice']['gender'] - self.voice.name = data['voice']['name'] + self.voice = voice.Voice(data['voice']['language'], + data['voice']['name']) self.pitch = data['pitch'] self.rate = data['rate'] self.eyes = [eyes[i] for i in data['eyes']] self.mouth = mouths[data['mouth']] + return self + class View(gtk.EventBox): def __init__(self, fill_color=style.COLOR_BUTTON_GREY): gtk.EventBox.__init__(self) @@ -116,18 +93,6 @@ class View(gtk.EventBox): self.connect('size-allocate', self._size_allocate_cb) self._audio = audio.AudioGrab() - self._synth = None - # try: - # self._synth = speechd.client.SSIPClient("Speak.activity") - # try: - # # Try some speechd v0.6.6 features - # print "Output modules:", self._synth.list_output_modules() - # print "Voices:", self._synth.list_synthesis_voices() - # except: - # pass - # except: - # self._synth = None - # print "Falling back to espeak command line tool." # make an empty box for some eyes self._eyes = None @@ -186,39 +151,10 @@ class View(gtk.EventBox): #self._mouth.add_events(gtk.gdk.POINTER_MOTION_MASK) def say(self, something): - if self._audio is None: - return - - logger.debug('%s: %s' % (self.status.voice.name, something)) - pitch = int(self.status.pitch) - rate = int(self.status.rate) - - if self._synth is not None: - # speechd uses -100 to 100 - pitch = pitch*2 - 100 - # speechd uses -100 to 100 - rate = rate*2 - 100 - - self._synth.set_rate(rate) - self._synth.set_pitch(pitch) - self._synth.set_language(self.status.voice.language) - self._synth.speak(something) #, callback=self._synth_cb) - else: - # espeak uses 0 to 99 - pitch = pitch - # espeak uses 80 to 370 - rate = 80 + (370-80) * rate / 100 - - # ideally we would stream the audio instead of writing to disk each time... - wavpath = "/tmp/speak.wav" - subprocess.call(["espeak", "-w", wavpath, "-p", str(pitch), "-s", str(rate), "-v", self.status.voice.name, something], stdout=subprocess.PIPE) - self._audio.playfile(wavpath) + self._audio.playfile(self.status, something) - def quiet(self): + def shut_up(self): self._audio.stop_sound_device() - def verbose(self): - self._audio.restart_sound_device() - def _size_allocate_cb(self, widget, allocation): self._mouthbox.set_size_request(-1, int(allocation.height/2.5)) diff --git a/Speak.activity/mouth.py b/Speak.activity/mouth.py index 404eb3d..8b72f4b 100644 --- a/Speak.activity/mouth.py +++ b/Speak.activity/mouth.py @@ -50,7 +50,7 @@ class Mouth(gtk.DrawingArea): return True def processBuffer(self, bounds): - if len(self.main_buffers) == 0: + if len(self.main_buffers) == 0 or len(self.newest_buffer) == 0: self.volume = 0 else: self.volume = numpy.core.max(self.main_buffers)# - numpy.core.min(self.main_buffers) diff --git a/Speak.activity/voice.py b/Speak.activity/voice.py index fda72dc..aaf5ea2 100644 --- a/Speak.activity/voice.py +++ b/Speak.activity/voice.py @@ -22,8 +22,8 @@ # along with Speak.activity. If not, see . -import subprocess import re, os +import gst from gettext import gettext as _ # Lets trick gettext into generating entries for the voice names we expect espeak to have @@ -68,9 +68,8 @@ expectedVoiceNames = [ _allVoices = {} class Voice: - def __init__(self, language, gender, name): + def __init__(self, language, name): self.language = language - self.gender = gender self.name = name friendlyname = name @@ -83,16 +82,14 @@ class Voice: def allVoices(): if len(_allVoices) == 0: - result = subprocess.Popen(["espeak", "--voices"], stdout=subprocess.PIPE).communicate()[0] - for line in result.split('\n'): - m = re.match(r'\s*\d+\s+([\w-]+)\s+([MF])\s+([\w_-]+)\s+(.+)', line) - if m: - language, gender, name, stuff = m.groups() - if stuff.startswith('mb/') or name in ('en-rhotic','english_rp','english_wmids'): - # these voices don't produce sound - continue - voice = Voice(language, gender, name) - _allVoices[voice.friendlyname] = voice + for i in gst.element_factory_make('espeak').props.voices: + name, language = i.split(':') + if name in ('en-rhotic','english_rp','english_wmids'): + # these voices don't produce sound + continue + voice = Voice(language, name) + _allVoices[voice.friendlyname] = voice + return _allVoices def defaultVoice(): -- cgit v0.9.1