From 23ffd37dded3bf872e42d7a00727ab3c4d105a97 Mon Sep 17 00:00:00 2001
From: Marvin W <git@larma.de>
Date: Sat, 1 May 2021 15:19:05 +0200
Subject: Echo Cancellation

---
 plugins/rtp/src/voice_processor.vala | 176 +++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 plugins/rtp/src/voice_processor.vala

(limited to 'plugins/rtp/src/voice_processor.vala')

diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala
new file mode 100644
index 00000000..e6dc7e8f
--- /dev/null
+++ b/plugins/rtp/src/voice_processor.vala
@@ -0,0 +1,176 @@
+using Gst;
+
+namespace Dino.Plugins.Rtp {
+public static extern Buffer adjust_to_running_time(Base.Transform transform, Buffer buf);
+}
+
+public class Dino.Plugins.Rtp.EchoProbe : Audio.Filter {
+    private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    public Audio.Info audio_info { get; private set; }
+    public signal void on_new_buffer(Buffer buffer);
+    private uint period_samples;
+    private uint period_size;
+    private Base.Adapter adapter = new Base.Adapter();
+
+    static construct {
+        add_static_pad_template(sink_template);
+        add_static_pad_template(src_template);
+        set_static_metadata("Acoustic Echo Canceller probe", "Generic/Audio", "Gathers playback buffers for echo cancellation", "Dino Team <contact@dino.im>");
+    }
+
+    construct {
+        set_passthrough(true);
+    }
+
+    public override bool setup(Audio.Info info) {
+        audio_info = info;
+        period_samples = info.rate / 100; // 10ms buffers
+        period_size = period_samples * info.bpf;
+        return true;
+    }
+
+
+    public override FlowReturn transform_ip(Buffer buf) {
+        lock (adapter) {
+            adapter.push(adjust_to_running_time(this, buf));
+            while (adapter.available() > period_size) {
+                on_new_buffer(adapter.take_buffer(period_size));
+            }
+        }
+        return FlowReturn.OK;
+    }
+
+    public override bool stop() {
+        adapter.clear();
+        return true;
+    }
+}
+
+public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter {
+    private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}};
+    public Audio.Info audio_info { get; private set; }
+    private ulong process_outgoing_buffer_handler_id;
+    private uint adjust_delay_timeout_id;
+    private uint period_samples;
+    private uint period_size;
+    private Base.Adapter adapter = new Base.Adapter();
+    private EchoProbe? echo_probe;
+    private Audio.StreamVolume? stream_volume;
+    private ClockTime last_reverse;
+    private void* native;
+
+    static construct {
+        add_static_pad_template(sink_template);
+        add_static_pad_template(src_template);
+        set_static_metadata("Voice Processor (AGC, AEC, filters, etc.)", "Generic/Audio", "Pre-processes voice with WebRTC Audio Processing Library", "Dino Team <contact@dino.im>");
+    }
+
+    construct {
+        set_passthrough(false);
+    }
+
+    public VoiceProcessor(EchoProbe? echo_probe = null, Audio.StreamVolume? stream_volume = null) {
+        this.echo_probe = echo_probe;
+        this.stream_volume = stream_volume;
+    }
+
+    private static extern void* init_native(int stream_delay);
+    private static extern void setup_native(void* native);
+    private static extern void destroy_native(void* native);
+    private static extern void analyze_reverse_stream(void* native, Audio.Info info, Buffer buffer);
+    private static extern void process_stream(void* native, Audio.Info info, Buffer buffer);
+    private static extern void adjust_stream_delay(void* native);
+    private static extern void notify_gain_level(void* native, int gain_level);
+    private static extern int get_suggested_gain_level(void* native);
+    private static extern bool get_stream_has_voice(void* native);
+
+    public override bool setup(Audio.Info info) {
+        debug("VoiceProcessor.setup(%s)", info.to_caps().to_string());
+        audio_info = info;
+        period_samples = info.rate / 100; // 10ms buffers
+        period_size = period_samples * info.bpf;
+        adapter.clear();
+        setup_native(native);
+        return true;
+    }
+
+    public override bool start() {
+        native = init_native(150);
+        if (process_outgoing_buffer_handler_id == 0 && echo_probe != null) {
+            process_outgoing_buffer_handler_id = echo_probe.on_new_buffer.connect(process_outgoing_buffer);
+        }
+        if (stream_volume == null && sinkpad.get_peer() != null && sinkpad.get_peer().get_parent_element() is Audio.StreamVolume) {
+            stream_volume = sinkpad.get_peer().get_parent_element() as Audio.StreamVolume;
+        }
+        return true;
+    }
+
+    private bool adjust_delay() {
+        if (native != null) {
+            adjust_stream_delay(native);
+            return Source.CONTINUE;
+        } else {
+            adjust_delay_timeout_id = 0;
+            return Source.REMOVE;
+        }
+    }
+
+    private void process_outgoing_buffer(Buffer buffer) {
+        if (buffer.pts != uint64.MAX) {
+            last_reverse = buffer.pts;
+        }
+        analyze_reverse_stream(native, echo_probe.audio_info, buffer);
+        if (adjust_delay_timeout_id == 0 && echo_probe != null) {
+            adjust_delay_timeout_id = Timeout.add(5000, adjust_delay);
+        }
+    }
+
+    public override FlowReturn submit_input_buffer(bool is_discont, Buffer input) {
+        lock (adapter) {
+            if (is_discont) {
+                adapter.clear();
+            }
+            adapter.push(adjust_to_running_time(this, input));
+        }
+        return FlowReturn.OK;
+    }
+
+    public override FlowReturn generate_output(out Buffer output_buffer) {
+        lock (adapter) {
+            if (adapter.available() >= period_size) {
+                output_buffer = (Gst.Buffer) adapter.take_buffer(period_size).make_writable();
+                int old_gain_level = 0;
+                if (stream_volume != null) {
+                    old_gain_level = (int) (stream_volume.get_volume(Audio.StreamVolumeFormat.LINEAR) * 255.0);
+                    notify_gain_level(native, old_gain_level);
+                }
+                process_stream(native, audio_info, output_buffer);
+                if (stream_volume != null) {
+                    int new_gain_level = get_suggested_gain_level(native);
+                    if (old_gain_level != new_gain_level) {
+                        debug("Gain: %i -> %i", old_gain_level, new_gain_level);
+                        stream_volume.set_volume(Audio.StreamVolumeFormat.LINEAR, ((double)new_gain_level) / 255.0);
+                    }
+                }
+            }
+        }
+        return FlowReturn.OK;
+    }
+
+    public override bool stop() {
+        if (process_outgoing_buffer_handler_id != 0) {
+            echo_probe.disconnect(process_outgoing_buffer_handler_id);
+            process_outgoing_buffer_handler_id = 0;
+        }
+        if (adjust_delay_timeout_id != 0) {
+            Source.remove(adjust_delay_timeout_id);
+            adjust_delay_timeout_id = 0;
+        }
+        adapter.clear();
+        destroy_native(native);
+        native = null;
+        return true;
+    }
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 8044b546d0ac15d34a3e6499b9c0d55d3d8f9c94 Mon Sep 17 00:00:00 2001
From: Marvin W <git@larma.de>
Date: Sun, 2 May 2021 00:34:17 +0200
Subject: Support voice processing on GStreamer 0.14

---
 plugins/rtp/CMakeLists.txt                 |  7 +++---
 plugins/rtp/src/voice_processor.vala       |  2 +-
 plugins/rtp/src/voice_processor_native.cpp | 37 ++++++++++++++++++------------
 3 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'plugins/rtp/src/voice_processor.vala')

diff --git a/plugins/rtp/CMakeLists.txt b/plugins/rtp/CMakeLists.txt
index b19c8a8f..52419425 100644
--- a/plugins/rtp/CMakeLists.txt
+++ b/plugins/rtp/CMakeLists.txt
@@ -17,7 +17,7 @@ if(Gst_VERSION VERSION_GREATER "1.16")
 endif()
 
 if(WebRTCAudioProcessing_VERSION GREATER "0.4")
-    message(WARNING "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far")
+    message(STATUS "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far")
     unset(WebRTCAudioProcessing_FOUND)
 endif()
 
@@ -25,8 +25,9 @@ if(WebRTCAudioProcessing_FOUND)
     set(RTP_DEFINITIONS ${RTP_DEFINITIONS} WITH_VOICE_PROCESSOR)
     set(RTP_VOICE_PROCESSOR_VALA src/voice_processor.vala)
     set(RTP_VOICE_PROCESSOR_CXX src/voice_processor_native.cpp)
+    set(RTP_VOICE_PROCESSOR_LIB webrtc-audio-processing)
 else()
-    message(WARNING "WebRTCAudioProcessing not found, build without voice pre-processing!")
+    message(STATUS "WebRTCAudioProcessing not found, build without voice pre-processing!")
 endif()
 
 vala_precompile(RTP_VALA_C
@@ -53,7 +54,7 @@ DEFINITIONS
 
 add_definitions(${VALA_CFLAGS} -DG_LOG_DOMAIN="rtp" -I${CMAKE_CURRENT_SOURCE_DIR}/src)
 add_library(rtp SHARED ${RTP_VALA_C} ${RTP_VOICE_PROCESSOR_CXX})
-target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 webrtc-audio-processing)
+target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 ${RTP_VOICE_PROCESSOR_LIB})
 set_target_properties(rtp PROPERTIES PREFIX "")
 set_target_properties(rtp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins/)
 
diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala
index e6dc7e8f..66e95d72 100644
--- a/plugins/rtp/src/voice_processor.vala
+++ b/plugins/rtp/src/voice_processor.vala
@@ -123,7 +123,7 @@ public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter {
         }
         analyze_reverse_stream(native, echo_probe.audio_info, buffer);
         if (adjust_delay_timeout_id == 0 && echo_probe != null) {
-            adjust_delay_timeout_id = Timeout.add(5000, adjust_delay);
+            adjust_delay_timeout_id = Timeout.add(1000, adjust_delay);
         }
     }
 
diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp
index 00f719e1..8a052cf8 100644
--- a/plugins/rtp/src/voice_processor_native.cpp
+++ b/plugins/rtp/src/voice_processor_native.cpp
@@ -11,6 +11,8 @@
 struct _DinoPluginsRtpVoiceProcessorNative {
     webrtc::AudioProcessing *apm;
     gint stream_delay;
+    gint last_median;
+    gint last_poor_delays;
 };
 
 extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) {
@@ -26,6 +28,8 @@ extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay)
     config.Set<webrtc::ExperimentalAgc>(new webrtc::ExperimentalAgc(true, 85));
     native->apm = webrtc::AudioProcessing::Create(config);
     native->stream_delay = stream_delay;
+    native->last_median = 0;
+    native->last_poor_delays = 0;
     return native;
 }
 
@@ -65,19 +69,19 @@ dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAud
     webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
     webrtc::AudioProcessing *apm = native->apm;
 
-    GstAudioBuffer audio_buffer;
-    gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ);
+    GstMapInfo map;
+    gst_buffer_map(buffer, &map, GST_MAP_READ);
 
     webrtc::AudioFrame frame;
     frame.num_channels_ = info->channels;
     frame.sample_rate_hz_ = info->rate;
     frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf;
-    memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
+    memcpy(frame.data_, map.data, frame.samples_per_channel_ * info->bpf);
 
     int err = apm->AnalyzeReverseStream(&frame);
     if (err < 0) g_warning("voice_processor_native.cpp: ProcessReverseStream %i", err);
 
-    gst_audio_buffer_unmap(&audio_buffer);
+    gst_buffer_unmap(buffer, &map);
 }
 
 extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) {
@@ -101,14 +105,17 @@ extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *nati
 extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) {
     _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr;
     webrtc::AudioProcessing *apm = native->apm;
-    int median, std;
+    int median, std, poor_delays;
     float fraction_poor_delays;
     apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays);
-    if (fraction_poor_delays < 0) return;
-    g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays);
-    if (fraction_poor_delays > 0.5) {
-        native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10)));
-        g_debug("voice_processor_native.cpp: Adjusted stream delay %i", native->stream_delay);
+    poor_delays = (int)(fraction_poor_delays * 100.0);
+    if (fraction_poor_delays < 0 || (native->last_median == median && native->last_poor_delays == poor_delays)) return;
+    g_debug("voice_processor_native.cpp: Stream delay metrics: median=%i std=%i poor_delays=%i%%", median, std, poor_delays);
+    native->last_median = median;
+    native->last_poor_delays = poor_delays;
+    if (poor_delays > 90) {
+        native->stream_delay = std::min(std::max(0, native->stream_delay + std::min(48, std::max(median, -48))), 384);
+        g_debug("voice_processor_native.cpp: set stream_delay=%i", native->stream_delay);
     }
 }
 
@@ -118,21 +125,21 @@ dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo *
     webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false);
     webrtc::AudioProcessing *apm = native->apm;
 
-    GstAudioBuffer audio_buffer;
-    gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE);
+    GstMapInfo map;
+    gst_buffer_map(buffer, &map, GST_MAP_READWRITE);
 
     webrtc::AudioFrame frame;
     frame.num_channels_ = info->channels;
     frame.sample_rate_hz_ = info->rate;
     frame.samples_per_channel_ = info->rate / 100;
-    memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf);
+    memcpy(frame.data_, map.data, frame.samples_per_channel_ * info->bpf);
 
     apm->set_stream_delay_ms(native->stream_delay);
     int err = apm->ProcessStream(&frame);
-    if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf);
+    if (err >= 0) memcpy(map.data, frame.data_, frame.samples_per_channel_ * info->bpf);
     if (err < 0) g_warning("voice_processor_native.cpp: ProcessStream %i", err);
 
-    gst_audio_buffer_unmap(&audio_buffer);
+    gst_buffer_unmap(buffer, &map);
 }
 
 extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) {
-- 
cgit v1.2.3