1 files changed, 451 insertions, 61 deletions
diff --git a/src/espeak.c b/src/espeak.c
index f49cd6d..8212af5 100644
--- a/src/espeak.c
+++ b/src/espeak.c
@@ -20,90 +20,406 @@
 #include <string.h>
 #include <glib.h>
 #include <gio/gio.h>
-#include <espeak/speak_lib.h>
 #include <gst/gst.h>
+#include <espeak/speak_lib.h>
+
+#define SYNC_BUFFER_SIZE 4096
+
+#define SPIN_QUEUE_SIZE 2
+#define SPIN_FRAME_SIZE 256
 
-#include "spin.h"
 #include "espeak.h"
+#include "text.h"
+
+typedef enum
+{
+    IN      = 1,
+    PROCESS = 2,
+    OUT     = 4,
+    PLAY    = 8
+} SpinState;
 
-struct _Espeak
+typedef enum
 {
-    Econtext *context;
-    guint rate;
-    guint pitch;
-    const gchar *voice;
+    CLOSE     = 1,
+    INPROCESS = 2
+} ContextState;
+
+typedef struct
+{
+    volatile SpinState state;
+
+    Text text;
+
+    GMemoryOutputStream *sound;
+    goffset sound_offset;
+
+    GArray *events;
+    goffset events_pos;
+} Espin;
+
+struct _Econtext
+{
+    volatile ContextState state;
+
+    Espin queue[SPIN_QUEUE_SIZE];
+    Espin *in;
+    Espin *process;
+    Espin *out;
+
+    GSList *in_queue;
+    GSList *process_chunk;
+
+    volatile gint rate;
+    volatile gint pitch;
+    volatile const gchar *voice;
 };
 
+static inline void
+spinning(Espin *base, Espin **i)
+{
+    if (++(*i) == base + SPIN_QUEUE_SIZE)
+        *i = base;
+}
+
+static void init();
+static void process_push(Econtext*);
+static void process_pop(Econtext*);
+
+static pthread_t process_tid;
+static pthread_mutex_t process_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t process_cond = PTHREAD_COND_INITIALIZER;
+static GSList *process_queue = NULL;
+
 static gint espeak_sample_rate = 0;
 static const espeak_VOICE **espeak_voices = NULL;
 static GOutputStream *espeak_buffer = NULL;
+static GArray *espeak_events = NULL;
 
-static gint
-synth_cb(short * data, int numsamples, espeak_EVENT * events)
+// -----------------------------------------------------------------------------
+
+Econtext*
+espeak_new()
 {
-    if (data == NULL)
-        return 0;
+    init();
 
-    if (numsamples > 0)
-        g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL);
+    Econtext *self = g_new0(Econtext, 1);
+    gint i;
 
-    GST_DEBUG("numsamples=%d data_size=%ld", numsamples*2,
-            g_memory_output_stream_get_data_size(G_MEMORY_OUTPUT_STREAM(
-                    espeak_buffer)));
+    for (i = SPIN_QUEUE_SIZE; i--;)
+    {
+        self->queue[i].state = IN;
+        self->queue[i].sound = G_MEMORY_OUTPUT_STREAM(
+                g_memory_output_stream_new(NULL, 0, realloc, free));
+        self->queue[i].events = g_array_new(FALSE, FALSE, sizeof(espeak_EVENT));
+    }
 
-    return 0;
+    self->in = self->queue;
+    self->process = self->queue;
+    self->out = self->queue;
+
+    self->process_chunk = g_slist_alloc();
+    self->process_chunk->data = self;
+
+    self->pitch = ESPEAK_DEFAULT_PITCH;
+    self->rate = ESPEAK_DEFAULT_RATE;
+    self->voice = ESPEAK_DEFAULT_VOICE;
+
+    GST_DEBUG("[%p]", self);
+
+    return self;
 }
 
-static void
-synth(const gchar *text, GMemoryOutputStream *sound, gpointer self_)
+void
+espeak_unref(Econtext *self)
 {
-    Espeak *self = (Espeak*)self_;
+    GST_DEBUG("[%p]", self);
 
-    espeak_SetParameter(espeakPITCH, self->pitch, 0);
-    espeak_SetParameter(espeakRATE, self->rate, 0);
-    espeak_SetVoiceByName(self->voice);
-    espeak_buffer = G_OUTPUT_STREAM(sound);
+    g_atomic_int_set(&self->state, g_atomic_int_get(&self->state) | CLOSE);
+    process_pop(self);
 
-    espeak_Synth(text, strlen(text)+1, 0, POS_WORD, 0, espeakCHARS_UTF8,
-            NULL, NULL);
+    GST_DEBUG("[%p]", self);
+
+    gint i;
+
+    for (i = SPIN_QUEUE_SIZE; i--;)
+    {
+        g_output_stream_close(G_OUTPUT_STREAM(self->queue[i].sound),
+                NULL, NULL);
+        g_object_unref(self->queue[i].sound);
+        text_unref(&self->queue[i].text);
+        g_array_free(self->queue[i].events, TRUE);
+    }
+
+    if (self->in_queue)
+    {
+        GSList *i;
+        for (i = self->in_queue; i; i = g_slist_next(i))
+            text_unref(i->data);
+        g_slist_free(self->in_queue);
+    }
+
+    g_slist_free(self->process_chunk);
+
+    memset(self, 0, sizeof(Econtext));
+    g_free(self);
 }
 
+// in/out ----------------------------------------------------------------------
+
 static void
-init()
+in_spinning(Econtext *self, Text *text)
 {
-    static volatile gsize initialized = 0;
+    GST_DEBUG("[%p] text.body=%s text.offset=%ld text.frame_len=%ld",
+            self, text->body, text->offset, text->frame_len);
 
-    if (initialized == 0)
+    gboolean chunked = FALSE;
+
+    while (!text_eot(text) && g_atomic_int_get(&self->in->state) == IN)
     {
-        ++initialized;
-        espeak_sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096,
-                NULL, 0);
-        espeak_SetSynthCallback(synth_cb);
-        espeak_voices = espeak_ListVoices(NULL);
-        spin_init(synth);
+        Espin *spin = self->in;
+        text_chunk(text, &spin->text, SPIN_FRAME_SIZE);
+        g_atomic_int_set(&spin->state, PROCESS);
+        spinning(self->queue, &self->in);
+        chunked = TRUE;
     }
+
+    int self_status = g_atomic_int_get(&self->state);
+
+    if (chunked && (self_status & INPROCESS) == 0)
+    {
+        g_atomic_int_set(&self->state, self_status | INPROCESS);
+        process_push(self);
+    }
+
+    GST_DEBUG("[%p] text.body=%s text.offset=%ld text.frame_len=%ld",
+            self, text->body, text->offset, text->frame_len);
 }
 
-Espeak*
-espeak_new()
+void
+espeak_in(Econtext *self, const gchar *str_)
 {
-    init();
+    GST_DEBUG("[%p] str=%s", self, str_);
 
-    Espeak *self = g_new0(Espeak, 1);
-    self->context = spin_new(self);
-    self->pitch = ESPEAK_DEFAULT_PITCH;
-    self->rate = ESPEAK_DEFAULT_RATE;
-    self->voice = ESPEAK_DEFAULT_VOICE;
+    if (str_ == NULL || *str_ == 0)
+        return;
 
-    return self;
+    Text *text = text_new(str_);
+
+    if (self->in_queue)
+    {
+        self->in_queue = g_slist_append(self->in_queue, text);
+        return;
+    }
+
+    in_spinning(self, text);
+
+    if (!text_eot(text))
+    {
+        GST_DEBUG("[%p] text_len=%d", self, text_len(text));
+        self->in_queue = g_slist_append(self->in_queue, text);
+    }
 }
 
-void
-espeak_unref(Espeak *self)
+GstBuffer*
+play(Espin *spin, gsize size_to_play, gpointer emitter)
 {
-    spin_unref(self->context);
-    memset(self, 0, sizeof(Espeak));
-    g_free(self);
+    inline gsize whole(Espin *spin, gsize size_to_play)
+    {
+        gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+        return MIN(size_to_play, spin_size);
+    }
+
+    inline gsize words(Espin *spin, gsize size_to_play, gpointer emitter)
+    {
+        gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+        size_to_play = MIN(size_to_play, spin_size);
+
+        GST_DEBUG("spin_size=%ld size_to_play=%ld spin->events_pos=%ld",
+                spin_size, size_to_play, spin->events_pos);
+
+        goffset event;
+        goffset sample_offset = 0;
+        goffset text_offset = -1;
+        gsize text_len = 0;
+
+        for (event = spin->events_pos; TRUE; ++event)
+        {
+            espeak_EVENT *i = &g_array_index(spin->events, espeak_EVENT, event);
+
+            if (i->type == espeakEVENT_LIST_TERMINATED)
+            {
+                GST_DEBUG("i->sample=%d", i->sample*2);
+                sample_offset = spin_size;
+                break;
+            }
+            else if (i->type == espeakEVENT_WORD)
+            {
+                sample_offset = i[1].sample*2;
+                text_offset = spin->text.offset + i->text_position - 1;
+                text_len = i->length;
+
+                GST_DEBUG("sample_offset=%d txt_offset=%d txt_len=%d, txt=%s",
+                        sample_offset, text_offset, text_len,
+                        spin->text.body + text_offset);
+                break;
+            }
+        }
+
+        if (sample_offset - spin->sound_offset > size_to_play)
+        {
+            GST_DEBUG("sample_offset=%ld spin->sound_offset=%ld",
+                    sample_offset, spin->sound_offset);
+            return size_to_play;
+        }
+
+        if (text_offset != -1)
+        {
+            GST_DEBUG("event=%ld", event);
+            g_signal_emit_by_name(emitter, "word",
+                    text_offset, text_len, G_TYPE_NONE);
+            spin->events_pos = event + 1;
+        }
+
+        return sample_offset - spin->sound_offset;
+    }
+
+    g_atomic_int_set(&spin->state, PLAY);
+
+    if (emitter)
+        size_to_play = words(spin, size_to_play, emitter);
+    else
+        size_to_play = whole(spin, size_to_play);
+
+    GstBuffer *out = gst_buffer_new();
+    GST_BUFFER_DATA(out) =
+            (guchar*)g_memory_output_stream_get_data(spin->sound) +
+                spin->sound_offset;
+    GST_BUFFER_SIZE(out) = size_to_play;
+
+    spin->sound_offset += size_to_play;
+
+    GST_DEBUG("size_to_play=%ld tell=%ld", size_to_play, spin->sound_offset);
+
+    return out;
+}
+
+GstBuffer*
+espeak_out(Econtext *self, gsize size_to_play, gpointer emitter)
+{
+    GST_DEBUG("[%p] size_to_play=%d", self, size_to_play);
+
+    for (;;)
+    {
+        pthread_mutex_lock(&process_lock);
+            while ((g_atomic_int_get(&self->state) & CLOSE) == 0 &&
+                    (g_atomic_int_get(&self->out->state) & (PLAY|OUT)) == 0)
+                pthread_cond_wait(&process_cond, &process_lock);
+        pthread_mutex_unlock(&process_lock);
+
+        if (g_atomic_int_get(&self->state) & CLOSE)
+        {
+            GST_DEBUG("[%p]", self);
+            return NULL;
+        }
+
+        Espin *spin = self->out;
+        gsize spin_size = g_memory_output_stream_get_data_size(spin->sound);
+
+        GST_DEBUG("[%p] spin->sound_offset=%ld spin_size=%ld", self,
+                spin->sound_offset, spin_size);
+
+        if (g_atomic_int_get(&spin->state) == PLAY &&
+                spin->sound_offset >= spin_size)
+        {
+            g_atomic_int_set(&spin->state, IN);
+            text_unref(&spin->text);
+            spinning(self->queue, &self->out);
+
+            if (self->in_queue)
+            {
+                Text *text = self->in_queue->data;
+                in_spinning(self, text);
+
+                if (text_eot(text))
+                {
+                    self->in_queue = g_slist_delete_link(self->in_queue,
+                            self->in_queue);
+                    GST_DEBUG("[%p] in_queue=%d", self,
+                            g_slist_length(self->in_queue));
+                }
+            }
+
+            GST_DEBUG("[%p]", self);
+
+            continue;
+        }
+
+        return play(spin, size_to_play, emitter);
+    }
+
+    return NULL;
+}
+
+// espeak ----------------------------------------------------------------------
+
+static gint
+synth_cb(short *data, int numsamples, espeak_EVENT *events)
+{
+    if (data == NULL)
+        return 0;
+
+    if (numsamples > 0)
+    {
+        g_output_stream_write(espeak_buffer, data, numsamples*2, NULL, NULL);
+
+        for (; events->type != espeakEVENT_LIST_TERMINATED; ++events)
+        {
+            GST_DEBUG("type=%d text_position=%d length=%d "
+                      "audio_position=%d sample=%d",
+                    events->type, events->text_position, events->length,
+                    events->audio_position, events->sample*2);
+            g_array_append_val(espeak_events, *events);
+        }
+    }
+
+    GST_DEBUG("numsamples=%d data_size=%ld", numsamples*2,
+            g_memory_output_stream_get_data_size(G_MEMORY_OUTPUT_STREAM(
+                    espeak_buffer)));
+
+    return 0;
+}
+
+static void
+synth(Econtext *self, Espin *spin)
+{
+    gchar *text = text_first(&spin->text);
+    gchar *last = text_last(&spin->text);
+
+    gchar old_last_char = *last;
+    *last = 0;
+
+    GST_DEBUG("[%p] text='%s' last=%d", self, text, last-text);
+
+    g_seekable_seek(G_SEEKABLE(spin->sound), 0, G_SEEK_SET,
+            NULL, NULL);
+    g_array_set_size(spin->events, 0);
+    spin->sound_offset = 0;
+    spin->events_pos = 0;
+
+    espeak_SetParameter(espeakPITCH, g_atomic_int_get(&self->pitch), 0);
+    espeak_SetParameter(espeakRATE, g_atomic_int_get(&self->rate), 0);
+    espeak_SetVoiceByName((gchar*)g_atomic_pointer_get(&self->voice));
+    espeak_buffer = G_OUTPUT_STREAM(spin->sound);
+    espeak_events = spin->events;
+
+    espeak_Synth(text, text_len(&spin->text), 0, POS_WORD, 0,
+            espeakCHARS_UTF8|espeakPHONEMES, NULL, NULL);
+
+    espeak_EVENT last_event = { espeakEVENT_LIST_TERMINATED };
+    last_event.sample = g_memory_output_stream_get_data_size(spin->sound) / 2;
+    g_array_append_val(spin->events, last_event);
+    *last = old_last_char;
 }
 
 gint
@@ -130,31 +446,105 @@ espeak_get_voices()
 }
 
 void
-espeak_set_pitch(Espeak *self, guint value)
+espeak_set_pitch(Econtext *self, guint value)
 {
-    self->pitch = value;
+    g_atomic_int_set(&self->pitch, value);
 }
 
 void
-espeak_set_rate(Espeak *self, guint value)
+espeak_set_rate(Econtext *self, guint value)
 {
-    self->rate = value;
+    g_atomic_int_set(&self->rate, value);
 }
 
 void
-espeak_set_voice(Espeak *self, const gchar *value)
+espeak_set_voice(Econtext *self, const gchar *value)
 {
-    self->voice = value;
+    g_atomic_pointer_set(&self->voice, value);
 }
 
-void
-espeak_say(Espeak *self, const gchar *text)
+// process ----------------------------------------------------------------------
+
+static void*
+process(void *data)
+{
+    pthread_mutex_lock(&process_lock);
+
+    for (;;)
+    {
+        while (process_queue == NULL)
+            pthread_cond_wait(&process_cond, &process_lock);
+
+        while (process_queue)
+        {
+            Econtext *context = (Econtext*)process_queue->data;
+            Espin *spin = context->process;
+
+            process_queue = g_slist_remove_link(process_queue, process_queue);
+
+            synth(context, spin);
+
+            g_atomic_int_set(&spin->state, OUT);
+            spinning(context->queue, &context->process);
+
+            if (g_atomic_int_get(&context->process->state) == PROCESS)
+            {
+                GST_DEBUG("[%p]", context);
+                process_queue = g_slist_concat(process_queue,
+                        context->process_chunk);
+            }
+            else
+            {
+                GST_DEBUG("[%p]", context);
+                g_atomic_int_set(&context->state,
+                        g_atomic_int_get(&context->state) & ~INPROCESS);
+            }
+        }
+
+        pthread_cond_broadcast(&process_cond);
+    }
+
+    pthread_mutex_unlock(&process_lock);
+
+    return NULL;
+}
+
+static void
+process_push(Econtext *context)
 {
-    spin_in(self->context, text);
+    pthread_mutex_lock(&process_lock);
+    process_queue = g_slist_concat(process_queue, context->process_chunk);
+    pthread_cond_broadcast(&process_cond);
+    pthread_mutex_unlock(&process_lock);
 }
 
-gpointer
-espeak_hear(Espeak *self, gsize size)
+static void
+process_pop(Econtext *context)
 {
-    return spin_out(self->context, &size);
+    pthread_mutex_lock(&process_lock);
+    process_queue = g_slist_remove_link(process_queue, context->process_chunk);
+    pthread_mutex_unlock(&process_lock);
+}
+
+// -----------------------------------------------------------------------------
+
+static void
+init()
+{
+    static volatile gsize initialized = 0;
+
+    if (initialized == 0)
+    {
+        ++initialized;
+        espeak_sample_rate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
+                SYNC_BUFFER_SIZE, NULL, 0);
+        espeak_SetSynthCallback(synth_cb);
+        espeak_voices = espeak_ListVoices(NULL);
+
+        pthread_attr_t attr;
+        g_assert(pthread_attr_init(&attr) == 0);
+        g_assert(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) == 0);
+        g_assert(pthread_create(&process_tid, &attr, process, NULL) == 0);
+        g_assert(pthread_attr_destroy(&attr) == 0);
+    }
 }