From 23ffd37dded3bf872e42d7a00727ab3c4d105a97 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Sat, 1 May 2021 15:19:05 +0200 Subject: Echo Cancellation --- plugins/rtp/src/voice_processor.vala | 176 +++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 plugins/rtp/src/voice_processor.vala (limited to 'plugins/rtp/src/voice_processor.vala') diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala new file mode 100644 index 00000000..e6dc7e8f --- /dev/null +++ b/plugins/rtp/src/voice_processor.vala @@ -0,0 +1,176 @@ +using Gst; + +namespace Dino.Plugins.Rtp { +public static extern Buffer adjust_to_running_time(Base.Transform transform, Buffer buf); +} + +public class Dino.Plugins.Rtp.EchoProbe : Audio.Filter { + private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + public Audio.Info audio_info { get; private set; } + public signal void on_new_buffer(Buffer buffer); + private uint period_samples; + private uint period_size; + private Base.Adapter adapter = new Base.Adapter(); + + static construct { + add_static_pad_template(sink_template); + add_static_pad_template(src_template); + set_static_metadata("Acoustic Echo Canceller probe", "Generic/Audio", "Gathers playback buffers for echo cancellation", "Dino Team "); + } + + construct { + set_passthrough(true); + } + + public override bool setup(Audio.Info info) { + audio_info = info; + period_samples = info.rate / 100; // 10ms buffers + period_size = period_samples * info.bpf; + return true; + } + + + public override FlowReturn transform_ip(Buffer buf) { + lock (adapter) { + adapter.push(adjust_to_running_time(this, buf)); + while (adapter.available() > period_size) { + on_new_buffer(adapter.take_buffer(period_size)); + } + } + return FlowReturn.OK; + } + + public override bool stop() { + adapter.clear(); + return true; + } +} + +public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter { + private static StaticPadTemplate sink_template = {"sink", PadDirection.SINK, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + private static StaticPadTemplate src_template = {"src", PadDirection.SRC, PadPresence.ALWAYS, {null, "audio/x-raw,rate=48000,channels=1,layout=interleaved,format=S16LE"}}; + public Audio.Info audio_info { get; private set; } + private ulong process_outgoing_buffer_handler_id; + private uint adjust_delay_timeout_id; + private uint period_samples; + private uint period_size; + private Base.Adapter adapter = new Base.Adapter(); + private EchoProbe? echo_probe; + private Audio.StreamVolume? stream_volume; + private ClockTime last_reverse; + private void* native; + + static construct { + add_static_pad_template(sink_template); + add_static_pad_template(src_template); + set_static_metadata("Voice Processor (AGC, AEC, filters, etc.)", "Generic/Audio", "Pre-processes voice with WebRTC Audio Processing Library", "Dino Team "); + } + + construct { + set_passthrough(false); + } + + public VoiceProcessor(EchoProbe? echo_probe = null, Audio.StreamVolume? stream_volume = null) { + this.echo_probe = echo_probe; + this.stream_volume = stream_volume; + } + + private static extern void* init_native(int stream_delay); + private static extern void setup_native(void* native); + private static extern void destroy_native(void* native); + private static extern void analyze_reverse_stream(void* native, Audio.Info info, Buffer buffer); + private static extern void process_stream(void* native, Audio.Info info, Buffer buffer); + private static extern void adjust_stream_delay(void* native); + private static extern void notify_gain_level(void* native, int gain_level); + private static extern int get_suggested_gain_level(void* native); + private static extern bool get_stream_has_voice(void* native); + + public override bool setup(Audio.Info info) { + debug("VoiceProcessor.setup(%s)", info.to_caps().to_string()); + audio_info = info; + period_samples = info.rate / 100; // 10ms buffers + period_size = period_samples * info.bpf; + adapter.clear(); + setup_native(native); + return true; + } + + public override bool start() { + native = init_native(150); + if (process_outgoing_buffer_handler_id == 0 && echo_probe != null) { + process_outgoing_buffer_handler_id = echo_probe.on_new_buffer.connect(process_outgoing_buffer); + } + if (stream_volume == null && sinkpad.get_peer() != null && sinkpad.get_peer().get_parent_element() is Audio.StreamVolume) { + stream_volume = sinkpad.get_peer().get_parent_element() as Audio.StreamVolume; + } + return true; + } + + private bool adjust_delay() { + if (native != null) { + adjust_stream_delay(native); + return Source.CONTINUE; + } else { + adjust_delay_timeout_id = 0; + return Source.REMOVE; + } + } + + private void process_outgoing_buffer(Buffer buffer) { + if (buffer.pts != uint64.MAX) { + last_reverse = buffer.pts; + } + analyze_reverse_stream(native, echo_probe.audio_info, buffer); + if (adjust_delay_timeout_id == 0 && echo_probe != null) { + adjust_delay_timeout_id = Timeout.add(5000, adjust_delay); + } + } + + public override FlowReturn submit_input_buffer(bool is_discont, Buffer input) { + lock (adapter) { + if (is_discont) { + adapter.clear(); + } + adapter.push(adjust_to_running_time(this, input)); + } + return FlowReturn.OK; + } + + public override FlowReturn generate_output(out Buffer output_buffer) { + lock (adapter) { + if (adapter.available() >= period_size) { + output_buffer = (Gst.Buffer) adapter.take_buffer(period_size).make_writable(); + int old_gain_level = 0; + if (stream_volume != null) { + old_gain_level = (int) (stream_volume.get_volume(Audio.StreamVolumeFormat.LINEAR) * 255.0); + notify_gain_level(native, old_gain_level); + } + process_stream(native, audio_info, output_buffer); + if (stream_volume != null) { + int new_gain_level = get_suggested_gain_level(native); + if (old_gain_level != new_gain_level) { + debug("Gain: %i -> %i", old_gain_level, new_gain_level); + stream_volume.set_volume(Audio.StreamVolumeFormat.LINEAR, ((double)new_gain_level) / 255.0); + } + } + } + } + return FlowReturn.OK; + } + + public override bool stop() { + if (process_outgoing_buffer_handler_id != 0) { + echo_probe.disconnect(process_outgoing_buffer_handler_id); + process_outgoing_buffer_handler_id = 0; + } + if (adjust_delay_timeout_id != 0) { + Source.remove(adjust_delay_timeout_id); + adjust_delay_timeout_id = 0; + } + adapter.clear(); + destroy_native(native); + native = null; + return true; + } +} \ No newline at end of file -- cgit v1.2.3 From 8044b546d0ac15d34a3e6499b9c0d55d3d8f9c94 Mon Sep 17 00:00:00 2001 From: Marvin W Date: Sun, 2 May 2021 00:34:17 +0200 Subject: Support voice processing on GStreamer 0.14 --- plugins/rtp/CMakeLists.txt | 7 +++--- plugins/rtp/src/voice_processor.vala | 2 +- plugins/rtp/src/voice_processor_native.cpp | 37 ++++++++++++++++++------------ 3 files changed, 27 insertions(+), 19 deletions(-) (limited to 'plugins/rtp/src/voice_processor.vala') diff --git a/plugins/rtp/CMakeLists.txt b/plugins/rtp/CMakeLists.txt index b19c8a8f..52419425 100644 --- a/plugins/rtp/CMakeLists.txt +++ b/plugins/rtp/CMakeLists.txt @@ -17,7 +17,7 @@ if(Gst_VERSION VERSION_GREATER "1.16") endif() if(WebRTCAudioProcessing_VERSION GREATER "0.4") - message(WARNING "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far") + message(STATUS "Ignoring WebRTCAudioProcessing, only versions < 0.4 supported so far") unset(WebRTCAudioProcessing_FOUND) endif() @@ -25,8 +25,9 @@ if(WebRTCAudioProcessing_FOUND) set(RTP_DEFINITIONS ${RTP_DEFINITIONS} WITH_VOICE_PROCESSOR) set(RTP_VOICE_PROCESSOR_VALA src/voice_processor.vala) set(RTP_VOICE_PROCESSOR_CXX src/voice_processor_native.cpp) + set(RTP_VOICE_PROCESSOR_LIB webrtc-audio-processing) else() - message(WARNING "WebRTCAudioProcessing not found, build without voice pre-processing!") + message(STATUS "WebRTCAudioProcessing not found, build without voice pre-processing!") endif() vala_precompile(RTP_VALA_C @@ -53,7 +54,7 @@ DEFINITIONS add_definitions(${VALA_CFLAGS} -DG_LOG_DOMAIN="rtp" -I${CMAKE_CURRENT_SOURCE_DIR}/src) add_library(rtp SHARED ${RTP_VALA_C} ${RTP_VOICE_PROCESSOR_CXX}) -target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 webrtc-audio-processing) +target_link_libraries(rtp libdino crypto-vala ${RTP_PACKAGES} gstreamer-rtp-1.0 ${RTP_VOICE_PROCESSOR_LIB}) set_target_properties(rtp PROPERTIES PREFIX "") set_target_properties(rtp PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins/) diff --git a/plugins/rtp/src/voice_processor.vala b/plugins/rtp/src/voice_processor.vala index e6dc7e8f..66e95d72 100644 --- a/plugins/rtp/src/voice_processor.vala +++ b/plugins/rtp/src/voice_processor.vala @@ -123,7 +123,7 @@ public class Dino.Plugins.Rtp.VoiceProcessor : Audio.Filter { } analyze_reverse_stream(native, echo_probe.audio_info, buffer); if (adjust_delay_timeout_id == 0 && echo_probe != null) { - adjust_delay_timeout_id = Timeout.add(5000, adjust_delay); + adjust_delay_timeout_id = Timeout.add(1000, adjust_delay); } } diff --git a/plugins/rtp/src/voice_processor_native.cpp b/plugins/rtp/src/voice_processor_native.cpp index 00f719e1..8a052cf8 100644 --- a/plugins/rtp/src/voice_processor_native.cpp +++ b/plugins/rtp/src/voice_processor_native.cpp @@ -11,6 +11,8 @@ struct _DinoPluginsRtpVoiceProcessorNative { webrtc::AudioProcessing *apm; gint stream_delay; + gint last_median; + gint last_poor_delays; }; extern "C" void *dino_plugins_rtp_adjust_to_running_time(GstBaseTransform *transform, GstBuffer *buffer) { @@ -26,6 +28,8 @@ extern "C" void *dino_plugins_rtp_voice_processor_init_native(gint stream_delay) config.Set(new webrtc::ExperimentalAgc(true, 85)); native->apm = webrtc::AudioProcessing::Create(config); native->stream_delay = stream_delay; + native->last_median = 0; + native->last_poor_delays = 0; return native; } @@ -65,19 +69,19 @@ dino_plugins_rtp_voice_processor_analyze_reverse_stream(void *native_ptr, GstAud webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); webrtc::AudioProcessing *apm = native->apm; - GstAudioBuffer audio_buffer; - gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READ); + GstMapInfo map; + gst_buffer_map(buffer, &map, GST_MAP_READ); webrtc::AudioFrame frame; frame.num_channels_ = info->channels; frame.sample_rate_hz_ = info->rate; frame.samples_per_channel_ = gst_buffer_get_size(buffer) / info->bpf; - memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + memcpy(frame.data_, map.data, frame.samples_per_channel_ * info->bpf); int err = apm->AnalyzeReverseStream(&frame); if (err < 0) g_warning("voice_processor_native.cpp: ProcessReverseStream %i", err); - gst_audio_buffer_unmap(&audio_buffer); + gst_buffer_unmap(buffer, &map); } extern "C" void dino_plugins_rtp_voice_processor_notify_gain_level(void *native_ptr, gint gain_level) { @@ -101,14 +105,17 @@ extern "C" bool dino_plugins_rtp_voice_processor_get_stream_has_voice(void *nati extern "C" void dino_plugins_rtp_voice_processor_adjust_stream_delay(void *native_ptr) { _DinoPluginsRtpVoiceProcessorNative *native = (_DinoPluginsRtpVoiceProcessorNative *) native_ptr; webrtc::AudioProcessing *apm = native->apm; - int median, std; + int median, std, poor_delays; float fraction_poor_delays; apm->echo_cancellation()->GetDelayMetrics(&median, &std, &fraction_poor_delays); - if (fraction_poor_delays < 0) return; - g_debug("voice_processor_native.cpp: Stream delay metrics: %i %i %f", median, std, fraction_poor_delays); - if (fraction_poor_delays > 0.5) { - native->stream_delay = std::max(0, native->stream_delay + std::min(-10, std::max(median, 10))); - g_debug("voice_processor_native.cpp: Adjusted stream delay %i", native->stream_delay); + poor_delays = (int)(fraction_poor_delays * 100.0); + if (fraction_poor_delays < 0 || (native->last_median == median && native->last_poor_delays == poor_delays)) return; + g_debug("voice_processor_native.cpp: Stream delay metrics: median=%i std=%i poor_delays=%i%%", median, std, poor_delays); + native->last_median = median; + native->last_poor_delays = poor_delays; + if (poor_delays > 90) { + native->stream_delay = std::min(std::max(0, native->stream_delay + std::min(48, std::max(median, -48))), 384); + g_debug("voice_processor_native.cpp: set stream_delay=%i", native->stream_delay); } } @@ -118,21 +125,21 @@ dino_plugins_rtp_voice_processor_process_stream(void *native_ptr, GstAudioInfo * webrtc::StreamConfig config(SAMPLE_RATE, SAMPLE_CHANNELS, false); webrtc::AudioProcessing *apm = native->apm; - GstAudioBuffer audio_buffer; - gst_audio_buffer_map(&audio_buffer, info, buffer, GST_MAP_READWRITE); + GstMapInfo map; + gst_buffer_map(buffer, &map, GST_MAP_READWRITE); webrtc::AudioFrame frame; frame.num_channels_ = info->channels; frame.sample_rate_hz_ = info->rate; frame.samples_per_channel_ = info->rate / 100; - memcpy(frame.data_, audio_buffer.planes[0], frame.samples_per_channel_ * info->bpf); + memcpy(frame.data_, map.data, frame.samples_per_channel_ * info->bpf); apm->set_stream_delay_ms(native->stream_delay); int err = apm->ProcessStream(&frame); - if (err >= 0) memcpy(audio_buffer.planes[0], frame.data_, frame.samples_per_channel_ * info->bpf); + if (err >= 0) memcpy(map.data, frame.data_, frame.samples_per_channel_ * info->bpf); if (err < 0) g_warning("voice_processor_native.cpp: ProcessStream %i", err); - gst_audio_buffer_unmap(&audio_buffer); + gst_buffer_unmap(buffer, &map); } extern "C" void dino_plugins_rtp_voice_processor_destroy_native(void *native_ptr) { -- cgit v1.2.3