diff options
Diffstat (limited to 'src/espeak.c')
-rw-r--r-- | src/espeak.c | 512 |
1 files changed, 451 insertions, 61 deletions
diff --git a/src/espeak.c b/src/espeak.c index f49cd6d..8212af5 100644 --- a/src/espeak.c +++ b/src/espeak.c @@ -20,90 +20,406 @@ #include <string.h> #include <glib.h> #include <gio/gio.h> -#include <espeak/speak_lib.h> #include <gst/gst.h> +#include <espeak/speak_lib.h> + +#define SYNC_BUFFER_SIZE 4096 + +#define SPIN_QUEUE_SIZE 2 +#define SPIN_FRAME_SIZE 256 -#include "spin.h" #include "espeak.h" +#include "text.h" + +typedef enum +{ + IN = 1, + PROCESS = 2, + OUT = 4, + PLAY = 8 +} SpinState; -struct _Espeak +typedef enum { - Econtext *context; - guint rate; - guint pitch; - const gchar *voice; + CLOSE = 1, + INPROCESS = 2 +} ContextState; + +typedef struct +{ + volatile SpinState state; + + Text text; + + GMemoryOutputStream *sound; + goffset sound_offset; + + GArray *events; + goffset events_pos; +} Espin; + +struct _Econtext +{ + volatile ContextState state; + + Espin queue[SPIN_QUEUE_SIZE]; + Espin *in; + Espin *process; + Espin *out; + + GSList *in_queue; + GSList *process_chunk; + + volatile gint rate; + volatile gint pitch; + volatile const gchar *voice; }; +static inline void +spinning(Espin *base, Espin **i) +{ + if (++(*i) == base + SPIN_QUEUE_SIZE) + *i = base; +} + +static void init(); +static void process_push(Econtext*); +static void process_pop(Econtext*); + +static pthread_t process_tid; +static pthread_mutex_t process_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t process_cond = PTHREAD_COND_INITIALIZER; +static GSList *process_queue = NULL; + static gint espeak_sample_rate = 0; static const espeak_VOICE **espeak_voices = NULL; static GOutputStream *espeak_buffer = NULL; +static GArray *espeak_events = NULL; -static gint -synth_cb(short * data, int numsamples, espeak_EVENT * events) +// ----------------------------------------------------------------------------- + +Econtext* +espeak_new() { - if (data == NULL) - return 0; + init(); - if (numsamples > 0) - g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL); + Econtext *self = g_new0(Econtext, 1); + gint i; - GST_DEBUG("numsamples=%d data_size=%ld", numsamples*2, - g_memory_output_stream_get_data_size(G_MEMORY_OUTPUT_STREAM( - espeak_buffer))); + for (i = SPIN_QUEUE_SIZE; i--;) + { + self->queue[i].state = IN; + self->queue[i].sound = G_MEMORY_OUTPUT_STREAM( + g_memory_output_stream_new(NULL, 0, realloc, free)); + self->queue[i].events = g_array_new(FALSE, FALSE, sizeof(espeak_EVENT)); + } - return 0; + self->in = self->queue; + self->process = self->queue; + self->out = self->queue; + + self->process_chunk = g_slist_alloc(); + self->process_chunk->data = self; + + self->pitch = ESPEAK_DEFAULT_PITCH; + self->rate = ESPEAK_DEFAULT_RATE; + self->voice = ESPEAK_DEFAULT_VOICE; + + GST_DEBUG("[%p]", self); + + return self; } -static void -synth(const gchar *text, GMemoryOutputStream *sound, gpointer self_) +void +espeak_unref(Econtext *self) { - Espeak *self = (Espeak*)self_; + GST_DEBUG("[%p]", self); - espeak_SetParameter(espeakPITCH, self->pitch, 0); - espeak_SetParameter(espeakRATE, self->rate, 0); - espeak_SetVoiceByName(self->voice); - espeak_buffer = G_OUTPUT_STREAM(sound); + g_atomic_int_set(&self->state, g_atomic_int_get(&self->state) | CLOSE); + process_pop(self); - espeak_Synth(text, strlen(text)+1, 0, POS_WORD, 0, espeakCHARS_UTF8, - NULL, NULL); + GST_DEBUG("[%p]", self); + + gint i; + + for (i = SPIN_QUEUE_SIZE; i--;) + { + g_output_stream_close(G_OUTPUT_STREAM(self->queue[i].sound), + NULL, NULL); + g_object_unref(self->queue[i].sound); + text_unref(&self->queue[i].text); + g_array_free(self->queue[i].events, TRUE); + } + + if (self->in_queue) + { + GSList *i; + for (i = self->in_queue; i; i = g_slist_next(i)) + text_unref(i->data); + g_slist_free(self->in_queue); + } + + g_slist_free(self->process_chunk); + + memset(self, 0, sizeof(Econtext)); + g_free(self); } +// in/out ---------------------------------------------------------------------- + static void -init() +in_spinning(Econtext *self, Text *text) { - static volatile gsize initialized = 0; + GST_DEBUG("[%p] text.body=%s text.offset=%ld text.frame_len=%ld", + self, text->body, text->offset, text->frame_len); - if (initialized == 0) + gboolean chunked = FALSE; + + while (!text_eot(text) && g_atomic_int_get(&self->in->state) == IN) { - ++initialized; - espeak_sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, - NULL, 0); - espeak_SetSynthCallback(synth_cb); - espeak_voices = espeak_ListVoices(NULL); - spin_init(synth); + Espin *spin = self->in; + text_chunk(text, &spin->text, SPIN_FRAME_SIZE); + g_atomic_int_set(&spin->state, PROCESS); + spinning(self->queue, &self->in); + chunked = TRUE; } + + int self_status = g_atomic_int_get(&self->state); + + if (chunked && (self_status & INPROCESS) == 0) + { + g_atomic_int_set(&self->state, self_status | INPROCESS); + process_push(self); + } + + GST_DEBUG("[%p] text.body=%s text.offset=%ld text.frame_len=%ld", + self, text->body, text->offset, text->frame_len); } -Espeak* -espeak_new() +void +espeak_in(Econtext *self, const gchar *str_) { - init(); + GST_DEBUG("[%p] str=%s", self, str_); - Espeak *self = g_new0(Espeak, 1); - self->context = spin_new(self); - self->pitch = ESPEAK_DEFAULT_PITCH; - self->rate = ESPEAK_DEFAULT_RATE; - self->voice = ESPEAK_DEFAULT_VOICE; + if (str_ == NULL || *str_ == 0) + return; - return self; + Text *text = text_new(str_); + + if (self->in_queue) + { + self->in_queue = g_slist_append(self->in_queue, text); + return; + } + + in_spinning(self, text); + + if (!text_eot(text)) + { + GST_DEBUG("[%p] text_len=%d", self, text_len(text)); + self->in_queue = g_slist_append(self->in_queue, text); + } } -void -espeak_unref(Espeak *self) +GstBuffer* +play(Espin *spin, gsize size_to_play, gpointer emitter) { - spin_unref(self->context); - memset(self, 0, sizeof(Espeak)); - g_free(self); + inline gsize whole(Espin *spin, gsize size_to_play) + { + gsize spin_size = g_memory_output_stream_get_data_size(spin->sound); + return MIN(size_to_play, spin_size); + } + + inline gsize words(Espin *spin, gsize size_to_play, gpointer emitter) + { + gsize spin_size = g_memory_output_stream_get_data_size(spin->sound); + size_to_play = MIN(size_to_play, spin_size); + + GST_DEBUG("spin_size=%ld size_to_play=%ld spin->events_pos=%ld", + spin_size, size_to_play, spin->events_pos); + + goffset event; + goffset sample_offset = 0; + goffset text_offset = -1; + gsize text_len = 0; + + for (event = spin->events_pos; TRUE; ++event) + { + espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event); + + if (i->type == espeakEVENT_LIST_TERMINATED) + { + GST_DEBUG("i->sample=%d", i->sample*2); + sample_offset = spin_size; + break; + } + else if (i->type == espeakEVENT_WORD) + { + sample_offset = i[1].sample*2; + text_offset = spin->text.offset + i->text_position - 1; + text_len = i->length; + + GST_DEBUG("sample_offset=%d txt_offset=%d txt_len=%d, txt=%s", + sample_offset, text_offset, text_len, + spin->text.body + text_offset); + break; + } + } + + if (sample_offset - spin->sound_offset > size_to_play) + { + GST_DEBUG("sample_offset=%ld spin->sound_offset=%ld", + sample_offset, spin->sound_offset); + return size_to_play; + } + + if (text_offset != -1) + { + GST_DEBUG("event=%ld", event); + g_signal_emit_by_name(emitter, "word", + text_offset, text_len, G_TYPE_NONE); + spin->events_pos = event + 1; + } + + return sample_offset - spin->sound_offset; + } + + g_atomic_int_set(&spin->state, PLAY); + + if (emitter) + size_to_play = words(spin, size_to_play, emitter); + else + size_to_play = whole(spin, size_to_play); + + GstBuffer *out = gst_buffer_new(); + GST_BUFFER_DATA(out) = + (guchar*)g_memory_output_stream_get_data(spin->sound) + + spin->sound_offset; + GST_BUFFER_SIZE(out) = size_to_play; + + spin->sound_offset += size_to_play; + + GST_DEBUG("size_to_play=%ld tell=%ld", size_to_play, spin->sound_offset); + + return out; +} + +GstBuffer* +espeak_out(Econtext *self, gsize size_to_play, gpointer emitter) +{ + GST_DEBUG("[%p] size_to_play=%d", self, size_to_play); + + for (;;) + { + pthread_mutex_lock(&process_lock); + while ((g_atomic_int_get(&self->state) & CLOSE) == 0 && + (g_atomic_int_get(&self->out->state) & (PLAY|OUT)) == 0) + pthread_cond_wait(&process_cond, &process_lock); + pthread_mutex_unlock(&process_lock); + + if (g_atomic_int_get(&self->state) & CLOSE) + { + GST_DEBUG("[%p]", self); + return NULL; + } + + Espin *spin = self->out; + gsize spin_size = g_memory_output_stream_get_data_size(spin->sound); + + GST_DEBUG("[%p] spin->sound_offset=%ld spin_size=%ld", self, + spin->sound_offset, spin_size); + + if (g_atomic_int_get(&spin->state) == PLAY && + spin->sound_offset >= spin_size) + { + g_atomic_int_set(&spin->state, IN); + text_unref(&spin->text); + spinning(self->queue, &self->out); + + if (self->in_queue) + { + Text *text = self->in_queue->data; + in_spinning(self, text); + + if (text_eot(text)) + { + self->in_queue = g_slist_delete_link(self->in_queue, + self->in_queue); + GST_DEBUG("[%p] in_queue=%d", self, + g_slist_length(self->in_queue)); + } + } + + GST_DEBUG("[%p]", self); + + continue; + } + + return play(spin, size_to_play, emitter); + } + + return NULL; +} + +// espeak ---------------------------------------------------------------------- + +static gint +synth_cb(short *data, int numsamples, espeak_EVENT *events) +{ + if (data == NULL) + return 0; + + if (numsamples > 0) + { + g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL); + + for (; events->type != espeakEVENT_LIST_TERMINATED; ++events) + { + GST_DEBUG("type=%d text_position=%d length=%d " + "audio_position=%d sample=%d", + events->type, events->text_position, events->length, + events->audio_position, events->sample*2); + g_array_append_val(espeak_events, *events); + } + } + + GST_DEBUG("numsamples=%d data_size=%ld", numsamples*2, + g_memory_output_stream_get_data_size(G_MEMORY_OUTPUT_STREAM( + espeak_buffer))); + + return 0; +} + +static void +synth(Econtext *self, Espin *spin) +{ + gchar *text = text_first(&spin->text); + gchar *last = text_last(&spin->text); + + gchar old_last_char = *last; + *last = 0; + + GST_DEBUG("[%p] text='%s' last=%d", self, text, last-text); + + g_seekable_seek(G_SEEKABLE(spin->sound), 0, G_SEEK_SET, + NULL, NULL); + g_array_set_size(spin->events, 0); + spin->sound_offset = 0; + spin->events_pos = 0; + + espeak_SetParameter(espeakPITCH, g_atomic_int_get(&self->pitch), 0); + espeak_SetParameter(espeakRATE, g_atomic_int_get(&self->rate), 0); + espeak_SetVoiceByName((gchar*)g_atomic_pointer_get(&self->voice)); + espeak_buffer = G_OUTPUT_STREAM(spin->sound); + espeak_events = spin->events; + + espeak_Synth(text, text_len(&spin->text), 0, POS_WORD, 0, + espeakCHARS_UTF8|espeakPHONEMES, NULL, NULL); + + espeak_EVENT last_event = { espeakEVENT_LIST_TERMINATED }; + last_event.sample = g_memory_output_stream_get_data_size(spin->sound) / 2; + g_array_append_val(spin->events, last_event); + *last = old_last_char; } gint @@ -130,31 +446,105 @@ espeak_get_voices() } void -espeak_set_pitch(Espeak *self, guint value) +espeak_set_pitch(Econtext *self, guint value) { - self->pitch = value; + g_atomic_int_set(&self->pitch, value); } void -espeak_set_rate(Espeak *self, guint value) +espeak_set_rate(Econtext *self, guint value) { - self->rate = value; + g_atomic_int_set(&self->rate, value); } void -espeak_set_voice(Espeak *self, const gchar *value) +espeak_set_voice(Econtext *self, const gchar *value) { - self->voice = value; + g_atomic_pointer_set(&self->voice, value); } -void -espeak_say(Espeak *self, const gchar *text) +// process ---------------------------------------------------------------------- + +static void* +process(void *data) +{ + pthread_mutex_lock(&process_lock); + + for (;;) + { + while (process_queue == NULL) + pthread_cond_wait(&process_cond, &process_lock); + + while (process_queue) + { + Econtext *context = (Econtext*)process_queue->data; + Espin *spin = context->process; + + process_queue = g_slist_remove_link(process_queue, process_queue); + + synth(context, spin); + + g_atomic_int_set(&spin->state, OUT); + spinning(context->queue, &context->process); + + if (g_atomic_int_get(&context->process->state) == PROCESS) + { + GST_DEBUG("[%p]", context); + process_queue = g_slist_concat(process_queue, + context->process_chunk); + } + else + { + GST_DEBUG("[%p]", context); + g_atomic_int_set(&context->state, + g_atomic_int_get(&context->state) & ~INPROCESS); + } + } + + pthread_cond_broadcast(&process_cond); + } + + pthread_mutex_unlock(&process_lock); + + return NULL; +} + +static void +process_push(Econtext *context) { - spin_in(self->context, text); + pthread_mutex_lock(&process_lock); + process_queue = g_slist_concat(process_queue, context->process_chunk); + pthread_cond_broadcast(&process_cond); + pthread_mutex_unlock(&process_lock); } -gpointer -espeak_hear(Espeak *self, gsize size) +static void +process_pop(Econtext *context) { - return spin_out(self->context, &size); + pthread_mutex_lock(&process_lock); + process_queue = g_slist_remove_link(process_queue, context->process_chunk); + pthread_mutex_unlock(&process_lock); +} + +// ----------------------------------------------------------------------------- + +static void +init() +{ + static volatile gsize initialized = 0; + + if (initialized == 0) + { + ++initialized; + espeak_sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, + SYNC_BUFFER_SIZE, NULL, 0); + espeak_SetSynthCallback(synth_cb); + espeak_voices = espeak_ListVoices(NULL); + + pthread_attr_t attr; + g_assert(pthread_attr_init(&attr) == 0); + g_assert(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) == 0); + g_assert(pthread_create(&process_tid, &attr, process, NULL) == 0); + g_assert(pthread_attr_destroy(&attr) == 0); + } } |