Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAleksey Lim <alsroot@member.fsf.org>2009-03-08 20:35:49 (GMT)
committer Aleksey Lim <alsroot@member.fsf.org>2009-03-08 20:35:49 (GMT)
commit3f01911940efbc8af2bacf2d54b0d06a2802f1c1 (patch)
treefa82fa83299623e4791329f071b5b2e7a37d25f9
parent81613252fbcb2eec85a95eb1fefbd861c88732f5 (diff)
Add mark events
-rw-r--r--src/espeak.c138
-rw-r--r--src/espeak.h5
-rw-r--r--src/gstespeak.c14
-rw-r--r--src/gstespeak.h1
4 files changed, 139 insertions, 19 deletions
diff --git a/src/espeak.c b/src/espeak.c
index 92395f5..5ea1a4b 100644
--- a/src/espeak.c
+++ b/src/espeak.c
@@ -56,6 +56,8 @@ typedef struct
GArray *events;
goffset events_pos;
+ goffset mark_offset;
+ const gchar *mark_name;
} Espin;
struct _Econtext
@@ -75,6 +77,7 @@ struct _Econtext
volatile gint pitch;
volatile const gchar *voice;
volatile gint gap;
+ volatile gint track;
GstElement *emitter;
GstBus *bus;
@@ -90,7 +93,7 @@ spinning(Espin *base, Espin **i)
static void
emit_word(Econtext *self, guint offset, guint len)
{
- GstStructure *data = gst_structure_new("word",
+ GstStructure *data = gst_structure_new("espeak-word",
"offset", G_TYPE_UINT, offset,
"len", G_TYPE_UINT, len,
NULL);
@@ -100,6 +103,19 @@ emit_word(Econtext *self, guint offset, guint len)
gst_bus_post(self->bus, msg);
}
+static void
+emit_mark(Econtext *self, guint offset, const gchar *mark)
+{
+ GstStructure *data = gst_structure_new("espeak-mark",
+ "offset", G_TYPE_UINT, offset,
+ "mark", G_TYPE_STRING, mark,
+ NULL);
+ if (!self->bus)
+ self->bus = gst_element_get_bus(self->emitter);
+ GstMessage *msg = gst_message_new_element(GST_OBJECT(self->emitter), data);
+ gst_bus_post(self->bus, msg);
+}
+
static void init();
static void process_push(Econtext*);
static void process_pop(Econtext*);
@@ -145,6 +161,7 @@ espeak_new(GstElement *emitter)
self->rate = ESPEAK_DEFAULT_RATE;
self->voice = ESPEAK_DEFAULT_VOICE;
self->gap = ESPEAK_DEFAULT_GAP;
+ self->track = ESPEAK_TRACK_NONE;
self->emitter = emitter;
gst_object_ref(self->emitter);
@@ -246,14 +263,11 @@ play(Econtext *self, Espin *spin, gsize size_to_play)
return MIN(size_to_play, spin_size);
}
- inline gsize words(Econtext *self, Espin *spin, gsize size_to_play)
+ inline gsize word(Econtext *self, Espin *spin, gsize size_to_play)
{
gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
size_to_play = MIN(size_to_play, spin_size);
- GST_DEBUG("spin_size=%ld size_to_play=%ld spin->events_pos=%ld",
- spin_size, size_to_play, spin->events_pos);
-
goffset event;
goffset sample_offset = 0;
goffset text_offset = -1;
@@ -263,6 +277,10 @@ play(Econtext *self, Espin *spin, gsize size_to_play)
{
espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event);
+ GST_DEBUG("size_to_play=%ld event=%ld "
+ "i->type=%d i->text_position=%d",
+ size_to_play, event, i->type, i->text_position);
+
if (i->type == espeakEVENT_LIST_TERMINATED)
{
GST_DEBUG("i->sample=%d", i->sample*2);
@@ -301,12 +319,79 @@ play(Econtext *self, Espin *spin, gsize size_to_play)
return sample_offset - spin->sound_offset;
}
+ inline gsize mark(Econtext *self, Espin *spin, gsize size_to_play)
+ {
+ if (spin->mark_name)
+ {
+ emit_mark(self, spin->mark_offset, spin->mark_name);
+ spin->mark_offset = -1;
+ spin->mark_name = NULL;
+ }
+
+ gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+ size_to_play = MIN(size_to_play, spin_size);
+
+ goffset event;
+ goffset sample_offset = 0;
+ guint mark_offset = 0;
+ const gchar *mark_name = NULL;
+
+ for (event = spin->events_pos; TRUE; ++event)
+ {
+ espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event);
+
+ GST_DEBUG("size_to_play=%ld event=%ld "
+ "i->type=%d i->text_position=%d",
+ size_to_play, event, i->type, i->text_position);
+
+ if (i->type == espeakEVENT_LIST_TERMINATED)
+ {
+ sample_offset = spin_size;
+ break;
+ }
+ else if (i->type == espeakEVENT_MARK)
+ {
+ if (i->sample == 0)
+ {
+ if (spin->sound_offset == 0)
+ emit_mark(self, i->text_position - 1, i->id.name);
+ continue;
+ }
+
+ mark_offset = i->text_position - 1;
+ mark_name = i->id.name;
+ sample_offset = i->sample*2;
+ break;
+ }
+ }
+
+ if (sample_offset - spin->sound_offset > size_to_play)
+ {
+ GST_DEBUG("sample_offset=%ld spin->sound_offset=%ld",
+ sample_offset, spin->sound_offset);
+ return size_to_play;
+ }
+
+ spin->mark_offset = mark_offset;
+ spin->mark_name = mark_name;
+ spin->events_pos = event + 1;
+
+ return sample_offset - spin->sound_offset;
+ }
+
g_atomic_int_set(&spin->state, PLAY);
- if (self->emitter)
- size_to_play = words(self, spin, size_to_play);
- else
- size_to_play = whole(spin, size_to_play);
+ switch (g_atomic_int_get(&self->track))
+ {
+ case ESPEAK_TRACK_WORD:
+ size_to_play = word(self, spin, size_to_play);
+ break;
+ case ESPEAK_TRACK_MARK:
+ size_to_play = mark(self, spin, size_to_play);
+ break;
+ default:
+ size_to_play = whole(spin, size_to_play);
+ }
GstBuffer *out = gst_buffer_new();
GST_BUFFER_DATA(out) =
@@ -397,13 +482,16 @@ synth_cb(short *data, int numsamples, espeak_EVENT *events)
{
g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL);
- for (; events->type != espeakEVENT_LIST_TERMINATED; ++events)
+ if (espeak_events)
{
- GST_DEBUG("type=%d text_position=%d length=%d "
- "audio_position=%d sample=%d",
- events->type, events->text_position, events->length,
- events->audio_position, events->sample*2);
- g_array_append_val(espeak_events, *events);
+ for (; events->type != espeakEVENT_LIST_TERMINATED; ++events)
+ {
+ GST_DEBUG("type=%d text_position=%d length=%d "
+ "audio_position=%d sample=%d",
+ events->type, events->text_position, events->length,
+ events->audio_position, events->sample*2);
+ g_array_append_val(espeak_events, *events);
+ }
}
}
@@ -430,6 +518,8 @@ synth(Econtext *self, Espin *spin)
g_array_set_size(spin->events, 0);
spin->sound_offset = 0;
spin->events_pos = 0;
+ spin->mark_offset = -1;
+ spin->mark_name = NULL;
spin->last_word = -1;
espeak_SetParameter(espeakPITCH, g_atomic_int_get(&self->pitch), 0);
@@ -437,11 +527,17 @@ synth(Econtext *self, Espin *spin)
espeak_SetVoiceByName((gchar*)g_atomic_pointer_get(&self->voice));
espeak_SetParameter(espeakWORDGAP, g_atomic_int_get(&self->gap), 0);
+ gint track = g_atomic_int_get(&self->track);
+
espeak_buffer = G_OUTPUT_STREAM(spin->sound);
- espeak_events = spin->events;
+ espeak_events = track == ESPEAK_TRACK_NONE ? NULL : spin->events;
- espeak_Synth(text, text_len(&spin->text), 0, POS_WORD, 0,
- espeakCHARS_UTF8|espeakPHONEMES, NULL, NULL);
+ gint flags = espeakCHARS_UTF8;
+ if (track == ESPEAK_TRACK_MARK)
+ flags |= espeakSSML;
+
+ espeak_Synth(text, text_len(&spin->text), 0, POS_WORD, 0, flags,
+ NULL, NULL);
espeak_EVENT last_event = { espeakEVENT_LIST_TERMINATED };
last_event.sample = g_memory_output_stream_get_data_size(spin->sound) / 2;
@@ -496,6 +592,12 @@ espeak_set_gap(Econtext *self, guint value)
g_atomic_int_set(&self->gap, value);
}
+void
+espeak_set_track(Econtext *self, guint value)
+{
+ g_atomic_int_set(&self->track, value);
+}
+
// process ----------------------------------------------------------------------
static gpointer
diff --git a/src/espeak.h b/src/espeak.h
index 4fb4fc5..b0e44c2 100644
--- a/src/espeak.h
+++ b/src/espeak.h
@@ -23,6 +23,10 @@
#define ESPEAK_DEFAULT_VOICE "default"
#define ESPEAK_DEFAULT_GAP 0
+#define ESPEAK_TRACK_NONE 0
+#define ESPEAK_TRACK_WORD 1
+#define ESPEAK_TRACK_MARK 2
+
struct _Econtext;
typedef struct _Econtext Econtext;
@@ -35,6 +39,7 @@ void espeak_set_pitch(Econtext*, guint);
void espeak_set_rate(Econtext*, guint);
void espeak_set_voice(Econtext*, const gchar*);
void espeak_set_gap(Econtext*, guint);
+void espeak_set_track(Econtext*, guint);
void espeak_in(Econtext*, const gchar *str);
GstBuffer* espeak_out(Econtext*, gsize size_to_play);
diff --git a/src/gstespeak.c b/src/gstespeak.c
index e095a9a..7a50e7f 100644
--- a/src/gstespeak.c
+++ b/src/gstespeak.c
@@ -48,8 +48,9 @@ enum
PROP_PITCH,
PROP_RATE,
PROP_VOICE,
- PROP_VOICES,
PROP_GAP,
+ PROP_TRACK,
+ PROP_VOICES,
PROP_CAPS
};
@@ -129,6 +130,10 @@ gst_espeak_class_init(GstEspeakClass * klass)
g_param_spec_uint("gap", "Gap",
"Word gap", 0, G_MAXINT, ESPEAK_DEFAULT_GAP,
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
+ g_object_class_install_property(gobject_class, PROP_TRACK,
+ g_param_spec_uint("track", "Track",
+ "Track espeak events", 0, G_MAXINT, ESPEAK_TRACK_NONE,
+ G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
g_object_class_install_property(gobject_class, PROP_VOICES,
g_param_spec_boxed("voices", "List of voices",
"List of voices", G_TYPE_STRV,
@@ -206,6 +211,10 @@ gst_espeak_set_property(GObject *object, guint prop_id,
self->gap = g_value_get_uint(value);
espeak_set_gap(self->speak, self->gap);
break;
+ case PROP_TRACK:
+ self->track = g_value_get_uint(value);
+ espeak_set_track(self->speak, self->track);
+ break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
@@ -231,6 +240,9 @@ gst_espeak_get_property(GObject * object, guint prop_id,
case PROP_GAP:
g_value_set_uint(value, self->gap);
break;
+ case PROP_TRACK:
+ g_value_set_uint(value, self->track);
+ break;
case PROP_VOICES:
g_value_set_boxed(value, self->voices);
break;
diff --git a/src/gstespeak.h b/src/gstespeak.h
index 6759a4c..593ddb2 100644
--- a/src/gstespeak.h
+++ b/src/gstespeak.h
@@ -47,6 +47,7 @@ struct _GstEspeak
guint rate;
gchar *voice;
guint gap;
+ guint track;
gchar **voices;
GstCaps *caps;
gboolean poll;