Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/src/espeak.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/espeak.c')
-rw-r--r--src/espeak.c512
1 files changed, 451 insertions, 61 deletions
diff --git a/src/espeak.c b/src/espeak.c
index f49cd6d..8212af5 100644
--- a/src/espeak.c
+++ b/src/espeak.c
@@ -20,90 +20,406 @@
#include <string.h>
#include <glib.h>
#include <gio/gio.h>
-#include <espeak/speak_lib.h>
#include <gst/gst.h>
+#include <espeak/speak_lib.h>
+
+#define SYNC_BUFFER_SIZE 4096
+
+#define SPIN_QUEUE_SIZE 2
+#define SPIN_FRAME_SIZE 256
-#include "spin.h"
#include "espeak.h"
+#include "text.h"
+
+typedef enum
+{
+ IN = 1,
+ PROCESS = 2,
+ OUT = 4,
+ PLAY = 8
+} SpinState;
-struct _Espeak
+typedef enum
{
- Econtext *context;
- guint rate;
- guint pitch;
- const gchar *voice;
+ CLOSE = 1,
+ INPROCESS = 2
+} ContextState;
+
+typedef struct
+{
+ volatile SpinState state;
+
+ Text text;
+
+ GMemoryOutputStream *sound;
+ goffset sound_offset;
+
+ GArray *events;
+ goffset events_pos;
+} Espin;
+
+struct _Econtext
+{
+ volatile ContextState state;
+
+ Espin queue[SPIN_QUEUE_SIZE];
+ Espin *in;
+ Espin *process;
+ Espin *out;
+
+ GSList *in_queue;
+ GSList *process_chunk;
+
+ volatile gint rate;
+ volatile gint pitch;
+ volatile const gchar *voice;
};
+static inline void
+spinning(Espin *base, Espin **i)
+{
+ if (++(*i) == base + SPIN_QUEUE_SIZE)
+ *i = base;
+}
+
+static void init();
+static void process_push(Econtext*);
+static void process_pop(Econtext*);
+
+static pthread_t process_tid;
+static pthread_mutex_t process_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t process_cond = PTHREAD_COND_INITIALIZER;
+static GSList *process_queue = NULL;
+
static gint espeak_sample_rate = 0;
static const espeak_VOICE **espeak_voices = NULL;
static GOutputStream *espeak_buffer = NULL;
+static GArray *espeak_events = NULL;
-static gint
-synth_cb(short * data, int numsamples, espeak_EVENT * events)
+// -----------------------------------------------------------------------------
+
+Econtext*
+espeak_new()
{
- if (data == NULL)
- return 0;
+ init();
- if (numsamples > 0)
- g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL);
+ Econtext *self = g_new0(Econtext, 1);
+ gint i;
- GST_DEBUG("numsamples=%d data_size=%ld", numsamples*2,
- g_memory_output_stream_get_data_size(G_MEMORY_OUTPUT_STREAM(
- espeak_buffer)));
+ for (i = SPIN_QUEUE_SIZE; i--;)
+ {
+ self->queue[i].state = IN;
+ self->queue[i].sound = G_MEMORY_OUTPUT_STREAM(
+ g_memory_output_stream_new(NULL, 0, realloc, free));
+ self->queue[i].events = g_array_new(FALSE, FALSE, sizeof(espeak_EVENT));
+ }
- return 0;
+ self->in = self->queue;
+ self->process = self->queue;
+ self->out = self->queue;
+
+ self->process_chunk = g_slist_alloc();
+ self->process_chunk->data = self;
+
+ self->pitch = ESPEAK_DEFAULT_PITCH;
+ self->rate = ESPEAK_DEFAULT_RATE;
+ self->voice = ESPEAK_DEFAULT_VOICE;
+
+ GST_DEBUG("[%p]", self);
+
+ return self;
}
-static void
-synth(const gchar *text, GMemoryOutputStream *sound, gpointer self_)
+void
+espeak_unref(Econtext *self)
{
- Espeak *self = (Espeak*)self_;
+ GST_DEBUG("[%p]", self);
- espeak_SetParameter(espeakPITCH, self->pitch, 0);
- espeak_SetParameter(espeakRATE, self->rate, 0);
- espeak_SetVoiceByName(self->voice);
- espeak_buffer = G_OUTPUT_STREAM(sound);
+ g_atomic_int_set(&self->state, g_atomic_int_get(&self->state) | CLOSE);
+ process_pop(self);
- espeak_Synth(text, strlen(text)+1, 0, POS_WORD, 0, espeakCHARS_UTF8,
- NULL, NULL);
+ GST_DEBUG("[%p]", self);
+
+ gint i;
+
+ for (i = SPIN_QUEUE_SIZE; i--;)
+ {
+ g_output_stream_close(G_OUTPUT_STREAM(self->queue[i].sound),
+ NULL, NULL);
+ g_object_unref(self->queue[i].sound);
+ text_unref(&self->queue[i].text);
+ g_array_free(self->queue[i].events, TRUE);
+ }
+
+ if (self->in_queue)
+ {
+ GSList *i;
+ for (i = self->in_queue; i; i = g_slist_next(i))
+ text_unref(i->data);
+ g_slist_free(self->in_queue);
+ }
+
+ g_slist_free(self->process_chunk);
+
+ memset(self, 0, sizeof(Econtext));
+ g_free(self);
}
+// in/out ----------------------------------------------------------------------
+
static void
-init()
+in_spinning(Econtext *self, Text *text)
{
- static volatile gsize initialized = 0;
+ GST_DEBUG("[%p] text.body=%s text.offset=%ld text.frame_len=%ld",
+ self, text->body, text->offset, text->frame_len);
- if (initialized == 0)
+ gboolean chunked = FALSE;
+
+ while (!text_eot(text) && g_atomic_int_get(&self->in->state) == IN)
{
- ++initialized;
- espeak_sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096,
- NULL, 0);
- espeak_SetSynthCallback(synth_cb);
- espeak_voices = espeak_ListVoices(NULL);
- spin_init(synth);
+ Espin *spin = self->in;
+ text_chunk(text, &spin->text, SPIN_FRAME_SIZE);
+ g_atomic_int_set(&spin->state, PROCESS);
+ spinning(self->queue, &self->in);
+ chunked = TRUE;
}
+
+ int self_status = g_atomic_int_get(&self->state);
+
+ if (chunked && (self_status & INPROCESS) == 0)
+ {
+ g_atomic_int_set(&self->state, self_status | INPROCESS);
+ process_push(self);
+ }
+
+ GST_DEBUG("[%p] text.body=%s text.offset=%ld text.frame_len=%ld",
+ self, text->body, text->offset, text->frame_len);
}
-Espeak*
-espeak_new()
+void
+espeak_in(Econtext *self, const gchar *str_)
{
- init();
+ GST_DEBUG("[%p] str=%s", self, str_);
- Espeak *self = g_new0(Espeak, 1);
- self->context = spin_new(self);
- self->pitch = ESPEAK_DEFAULT_PITCH;
- self->rate = ESPEAK_DEFAULT_RATE;
- self->voice = ESPEAK_DEFAULT_VOICE;
+ if (str_ == NULL || *str_ == 0)
+ return;
- return self;
+ Text *text = text_new(str_);
+
+ if (self->in_queue)
+ {
+ self->in_queue = g_slist_append(self->in_queue, text);
+ return;
+ }
+
+ in_spinning(self, text);
+
+ if (!text_eot(text))
+ {
+ GST_DEBUG("[%p] text_len=%d", self, text_len(text));
+ self->in_queue = g_slist_append(self->in_queue, text);
+ }
}
-void
-espeak_unref(Espeak *self)
+GstBuffer*
+play(Espin *spin, gsize size_to_play, gpointer emitter)
{
- spin_unref(self->context);
- memset(self, 0, sizeof(Espeak));
- g_free(self);
+ inline gsize whole(Espin *spin, gsize size_to_play)
+ {
+ gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+ return MIN(size_to_play, spin_size);
+ }
+
+ inline gsize words(Espin *spin, gsize size_to_play, gpointer emitter)
+ {
+ gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+ size_to_play = MIN(size_to_play, spin_size);
+
+ GST_DEBUG("spin_size=%ld size_to_play=%ld spin->events_pos=%ld",
+ spin_size, size_to_play, spin->events_pos);
+
+ goffset event;
+ goffset sample_offset = 0;
+ goffset text_offset = -1;
+ gsize text_len = 0;
+
+ for (event = spin->events_pos; TRUE; ++event)
+ {
+ espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event);
+
+ if (i->type == espeakEVENT_LIST_TERMINATED)
+ {
+ GST_DEBUG("i->sample=%d", i->sample*2);
+ sample_offset = spin_size;
+ break;
+ }
+ else if (i->type == espeakEVENT_WORD)
+ {
+ sample_offset = i[1].sample*2;
+ text_offset = spin->text.offset + i->text_position - 1;
+ text_len = i->length;
+
+ GST_DEBUG("sample_offset=%d txt_offset=%d txt_len=%d, txt=%s",
+ sample_offset, text_offset, text_len,
+ spin->text.body + text_offset);
+ break;
+ }
+ }
+
+ if (sample_offset - spin->sound_offset > size_to_play)
+ {
+ GST_DEBUG("sample_offset=%ld spin->sound_offset=%ld",
+ sample_offset, spin->sound_offset);
+ return size_to_play;
+ }
+
+ if (text_offset != -1)
+ {
+ GST_DEBUG("event=%ld", event);
+ g_signal_emit_by_name(emitter, "word",
+ text_offset, text_len, G_TYPE_NONE);
+ spin->events_pos = event + 1;
+ }
+
+ return sample_offset - spin->sound_offset;
+ }
+
+ g_atomic_int_set(&spin->state, PLAY);
+
+ if (emitter)
+ size_to_play = words(spin, size_to_play, emitter);
+ else
+ size_to_play = whole(spin, size_to_play);
+
+ GstBuffer *out = gst_buffer_new();
+ GST_BUFFER_DATA(out) =
+ (guchar*)g_memory_output_stream_get_data(spin->sound) +
+ spin->sound_offset;
+ GST_BUFFER_SIZE(out) = size_to_play;
+
+ spin->sound_offset += size_to_play;
+
+ GST_DEBUG("size_to_play=%ld tell=%ld", size_to_play, spin->sound_offset);
+
+ return out;
+}
+
+GstBuffer*
+espeak_out(Econtext *self, gsize size_to_play, gpointer emitter)
+{
+ GST_DEBUG("[%p] size_to_play=%d", self, size_to_play);
+
+ for (;;)
+ {
+ pthread_mutex_lock(&process_lock);
+ while ((g_atomic_int_get(&self->state) & CLOSE) == 0 &&
+ (g_atomic_int_get(&self->out->state) & (PLAY|OUT)) == 0)
+ pthread_cond_wait(&process_cond, &process_lock);
+ pthread_mutex_unlock(&process_lock);
+
+ if (g_atomic_int_get(&self->state) & CLOSE)
+ {
+ GST_DEBUG("[%p]", self);
+ return NULL;
+ }
+
+ Espin *spin = self->out;
+ gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+
+ GST_DEBUG("[%p] spin->sound_offset=%ld spin_size=%ld", self,
+ spin->sound_offset, spin_size);
+
+ if (g_atomic_int_get(&spin->state) == PLAY &&
+ spin->sound_offset >= spin_size)
+ {
+ g_atomic_int_set(&spin->state, IN);
+ text_unref(&spin->text);
+ spinning(self->queue, &self->out);
+
+ if (self->in_queue)
+ {
+ Text *text = self->in_queue->data;
+ in_spinning(self, text);
+
+ if (text_eot(text))
+ {
+ self->in_queue = g_slist_delete_link(self->in_queue,
+ self->in_queue);
+ GST_DEBUG("[%p] in_queue=%d", self,
+ g_slist_length(self->in_queue));
+ }
+ }
+
+ GST_DEBUG("[%p]", self);
+
+ continue;
+ }
+
+ return play(spin, size_to_play, emitter);
+ }
+
+ return NULL;
+}
+
+// espeak ----------------------------------------------------------------------
+
+static gint
+synth_cb(short *data, int numsamples, espeak_EVENT *events)
+{
+ if (data == NULL)
+ return 0;
+
+ if (numsamples > 0)
+ {
+ g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL);
+
+ for (; events->type != espeakEVENT_LIST_TERMINATED; ++events)
+ {
+ GST_DEBUG("type=%d text_position=%d length=%d "
+ "audio_position=%d sample=%d",
+ events->type, events->text_position, events->length,
+ events->audio_position, events->sample*2);
+ g_array_append_val(espeak_events, *events);
+ }
+ }
+
+ GST_DEBUG("numsamples=%d data_size=%ld", numsamples*2,
+ g_memory_output_stream_get_data_size(G_MEMORY_OUTPUT_STREAM(
+ espeak_buffer)));
+
+ return 0;
+}
+
+static void
+synth(Econtext *self, Espin *spin)
+{
+ gchar *text = text_first(&spin->text);
+ gchar *last = text_last(&spin->text);
+
+ gchar old_last_char = *last;
+ *last = 0;
+
+ GST_DEBUG("[%p] text='%s' last=%d", self, text, last-text);
+
+ g_seekable_seek(G_SEEKABLE(spin->sound), 0, G_SEEK_SET,
+ NULL, NULL);
+ g_array_set_size(spin->events, 0);
+ spin->sound_offset = 0;
+ spin->events_pos = 0;
+
+ espeak_SetParameter(espeakPITCH, g_atomic_int_get(&self->pitch), 0);
+ espeak_SetParameter(espeakRATE, g_atomic_int_get(&self->rate), 0);
+ espeak_SetVoiceByName((gchar*)g_atomic_pointer_get(&self->voice));
+ espeak_buffer = G_OUTPUT_STREAM(spin->sound);
+ espeak_events = spin->events;
+
+ espeak_Synth(text, text_len(&spin->text), 0, POS_WORD, 0,
+ espeakCHARS_UTF8|espeakPHONEMES, NULL, NULL);
+
+ espeak_EVENT last_event = { espeakEVENT_LIST_TERMINATED };
+ last_event.sample = g_memory_output_stream_get_data_size(spin->sound) / 2;
+ g_array_append_val(spin->events, last_event);
+ *last = old_last_char;
}
gint
@@ -130,31 +446,105 @@ espeak_get_voices()
}
void
-espeak_set_pitch(Espeak *self, guint value)
+espeak_set_pitch(Econtext *self, guint value)
{
- self->pitch = value;
+ g_atomic_int_set(&self->pitch, value);
}
void
-espeak_set_rate(Espeak *self, guint value)
+espeak_set_rate(Econtext *self, guint value)
{
- self->rate = value;
+ g_atomic_int_set(&self->rate, value);
}
void
-espeak_set_voice(Espeak *self, const gchar *value)
+espeak_set_voice(Econtext *self, const gchar *value)
{
- self->voice = value;
+ g_atomic_pointer_set(&self->voice, value);
}
-void
-espeak_say(Espeak *self, const gchar *text)
+// process ----------------------------------------------------------------------
+
+static void*
+process(void *data)
+{
+ pthread_mutex_lock(&process_lock);
+
+ for (;;)
+ {
+ while (process_queue == NULL)
+ pthread_cond_wait(&process_cond, &process_lock);
+
+ while (process_queue)
+ {
+ Econtext *context = (Econtext*)process_queue->data;
+ Espin *spin = context->process;
+
+ process_queue = g_slist_remove_link(process_queue, process_queue);
+
+ synth(context, spin);
+
+ g_atomic_int_set(&spin->state, OUT);
+ spinning(context->queue, &context->process);
+
+ if (g_atomic_int_get(&context->process->state) == PROCESS)
+ {
+ GST_DEBUG("[%p]", context);
+ process_queue = g_slist_concat(process_queue,
+ context->process_chunk);
+ }
+ else
+ {
+ GST_DEBUG("[%p]", context);
+ g_atomic_int_set(&context->state,
+ g_atomic_int_get(&context->state) & ~INPROCESS);
+ }
+ }
+
+ pthread_cond_broadcast(&process_cond);
+ }
+
+ pthread_mutex_unlock(&process_lock);
+
+ return NULL;
+}
+
+static void
+process_push(Econtext *context)
{
- spin_in(self->context, text);
+ pthread_mutex_lock(&process_lock);
+ process_queue = g_slist_concat(process_queue, context->process_chunk);
+ pthread_cond_broadcast(&process_cond);
+ pthread_mutex_unlock(&process_lock);
}
-gpointer
-espeak_hear(Espeak *self, gsize size)
+static void
+process_pop(Econtext *context)
{
- return spin_out(self->context, &size);
+ pthread_mutex_lock(&process_lock);
+ process_queue = g_slist_remove_link(process_queue, context->process_chunk);
+ pthread_mutex_unlock(&process_lock);
+}
+
+// -----------------------------------------------------------------------------
+
+static void
+init()
+{
+ static volatile gsize initialized = 0;
+
+ if (initialized == 0)
+ {
+ ++initialized;
+ espeak_sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
+ SYNC_BUFFER_SIZE, NULL, 0);
+ espeak_SetSynthCallback(synth_cb);
+ espeak_voices = espeak_ListVoices(NULL);
+
+ pthread_attr_t attr;
+ g_assert(pthread_attr_init(&attr) == 0);
+ g_assert(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) == 0);
+ g_assert(pthread_create(&process_tid, &attr, process, NULL) == 0);
+ g_assert(pthread_attr_destroy(&attr) == 0);
+ }
}