/* * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "audio_processing_impl.h" #include <assert.h> #include "audio_buffer.h" #include "critical_section_wrapper.h" #include "echo_cancellation_impl.h" #include "echo_control_mobile_impl.h" #include "file_wrapper.h" #include "high_pass_filter_impl.h" #include "gain_control_impl.h" #include "level_estimator_impl.h" #include "module_common_types.h" #include "noise_suppression_impl.h" #include "processing_component.h" #include "splitting_filter.h" #include "voice_detection_impl.h" #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // Files generated at build-time by the protobuf compiler. #ifdef WEBRTC_ANDROID #include "external/webrtc/src/modules/audio_processing/debug.pb.h" #else #include "webrtc/audio_processing/debug.pb.h" #endif #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP namespace webrtc { AudioProcessing* AudioProcessing::Create(int id) { /*WEBRTC_TRACE(webrtc::kTraceModuleCall, webrtc::kTraceAudioProcessing, id, "AudioProcessing::Create()");*/ AudioProcessingImpl* apm = new AudioProcessingImpl(id); if (apm->Initialize() != kNoError) { delete apm; apm = NULL; } return apm; } void AudioProcessing::Destroy(AudioProcessing* apm) { delete static_cast<AudioProcessingImpl*>(apm); } AudioProcessingImpl::AudioProcessingImpl(int id) : id_(id), echo_cancellation_(NULL), echo_control_mobile_(NULL), gain_control_(NULL), high_pass_filter_(NULL), level_estimator_(NULL), noise_suppression_(NULL), voice_detection_(NULL), crit_(CriticalSectionWrapper::CreateCriticalSection()), render_audio_(NULL), capture_audio_(NULL), #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP debug_file_(FileWrapper::Create()), event_msg_(new audioproc::Event()), #endif sample_rate_hz_(kSampleRate16kHz), split_sample_rate_hz_(kSampleRate16kHz), samples_per_channel_(sample_rate_hz_ / 100), stream_delay_ms_(0), was_stream_delay_set_(false), num_reverse_channels_(1), num_input_channels_(1), num_output_channels_(1) { echo_cancellation_ = new EchoCancellationImpl(this); component_list_.push_back(echo_cancellation_); echo_control_mobile_ = new EchoControlMobileImpl(this); component_list_.push_back(echo_control_mobile_); gain_control_ = new GainControlImpl(this); component_list_.push_back(gain_control_); high_pass_filter_ = new HighPassFilterImpl(this); component_list_.push_back(high_pass_filter_); level_estimator_ = new LevelEstimatorImpl(this); component_list_.push_back(level_estimator_); noise_suppression_ = new NoiseSuppressionImpl(this); component_list_.push_back(noise_suppression_); voice_detection_ = new VoiceDetectionImpl(this); component_list_.push_back(voice_detection_); } AudioProcessingImpl::~AudioProcessingImpl() { while (!component_list_.empty()) { ProcessingComponent* component = component_list_.front(); component->Destroy(); delete component; component_list_.pop_front(); } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { debug_file_->CloseFile(); } #endif delete crit_; crit_ = NULL; if (render_audio_) { delete render_audio_; render_audio_ = NULL; } if (capture_audio_) { delete capture_audio_; capture_audio_ = NULL; } } CriticalSectionWrapper* AudioProcessingImpl::crit() const { return crit_; } int AudioProcessingImpl::split_sample_rate_hz() const { return split_sample_rate_hz_; } int AudioProcessingImpl::Initialize() { CriticalSectionScoped crit_scoped(*crit_); return InitializeLocked(); } int AudioProcessingImpl::InitializeLocked() { if (render_audio_ != NULL) { delete render_audio_; render_audio_ = NULL; } if (capture_audio_ != NULL) { delete capture_audio_; capture_audio_ = NULL; } render_audio_ = new AudioBuffer(num_reverse_channels_, samples_per_channel_); capture_audio_ = new AudioBuffer(num_input_channels_, samples_per_channel_); was_stream_delay_set_ = false; // Initialize all components. std::list<ProcessingComponent*>::iterator it; for (it = component_list_.begin(); it != component_list_.end(); it++) { int err = (*it)->Initialize(); if (err != kNoError) { return err; } } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { int err = WriteInitMessage(); if (err != kNoError) { return err; } } #endif return kNoError; } int AudioProcessingImpl::set_sample_rate_hz(int rate) { CriticalSectionScoped crit_scoped(*crit_); if (rate != kSampleRate8kHz && rate != kSampleRate16kHz && rate != kSampleRate32kHz) { return kBadParameterError; } sample_rate_hz_ = rate; samples_per_channel_ = rate / 100; if (sample_rate_hz_ == kSampleRate32kHz) { split_sample_rate_hz_ = kSampleRate16kHz; } else { split_sample_rate_hz_ = sample_rate_hz_; } return InitializeLocked(); } int AudioProcessingImpl::sample_rate_hz() const { return sample_rate_hz_; } int AudioProcessingImpl::set_num_reverse_channels(int channels) { CriticalSectionScoped crit_scoped(*crit_); // Only stereo supported currently. if (channels > 2 || channels < 1) { return kBadParameterError; } num_reverse_channels_ = channels; return InitializeLocked(); } int AudioProcessingImpl::num_reverse_channels() const { return num_reverse_channels_; } int AudioProcessingImpl::set_num_channels( int input_channels, int output_channels) { CriticalSectionScoped crit_scoped(*crit_); if (output_channels > input_channels) { return kBadParameterError; } // Only stereo supported currently. if (input_channels > 2 || input_channels < 1) { return kBadParameterError; } if (output_channels > 2 || output_channels < 1) { return kBadParameterError; } num_input_channels_ = input_channels; num_output_channels_ = output_channels; return InitializeLocked(); } int AudioProcessingImpl::num_input_channels() const { return num_input_channels_; } int AudioProcessingImpl::num_output_channels() const { return num_output_channels_; } int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { CriticalSectionScoped crit_scoped(*crit_); int err = kNoError; if (frame == NULL) { return kNullPointerError; } if (frame->_frequencyInHz != sample_rate_hz_) { return kBadSampleRateError; } if (frame->_audioChannel != num_input_channels_) { return kBadNumberChannelsError; } if (frame->_payloadDataLengthInSamples != samples_per_channel_) { return kBadDataLengthError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::STREAM); audioproc::Stream* msg = event_msg_->mutable_stream(); const size_t data_size = sizeof(int16_t) * frame->_payloadDataLengthInSamples * frame->_audioChannel; msg->set_input_data(frame->_payloadData, data_size); msg->set_delay(stream_delay_ms_); msg->set_drift(echo_cancellation_->stream_drift_samples()); msg->set_level(gain_control_->stream_analog_level()); } #endif capture_audio_->DeinterleaveFrom(frame); // TODO(ajm): experiment with mixing and AEC placement. if (num_output_channels_ < num_input_channels_) { capture_audio_->Mix(num_output_channels_); frame->_audioChannel = num_output_channels_; } bool data_changed = stream_data_changed(); if (analysis_needed(data_changed)) { for (int i = 0; i < num_output_channels_; i++) { // Split into a low and high band. SplittingFilterAnalysis(capture_audio_->data(i), capture_audio_->low_pass_split_data(i), capture_audio_->high_pass_split_data(i), capture_audio_->analysis_filter_state1(i), capture_audio_->analysis_filter_state2(i)); } } err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); if (err != kNoError) { return err; } err = gain_control_->AnalyzeCaptureAudio(capture_audio_); if (err != kNoError) { return err; } err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); if (err != kNoError) { return err; } if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { capture_audio_->CopyLowPassToReference(); } err = noise_suppression_->ProcessCaptureAudio(capture_audio_); if (err != kNoError) { return err; } err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); if (err != kNoError) { return err; } err = voice_detection_->ProcessCaptureAudio(capture_audio_); if (err != kNoError) { return err; } err = gain_control_->ProcessCaptureAudio(capture_audio_); if (err != kNoError) { return err; } if (synthesis_needed(data_changed)) { for (int i = 0; i < num_output_channels_; i++) { // Recombine low and high bands. SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), capture_audio_->high_pass_split_data(i), capture_audio_->data(i), capture_audio_->synthesis_filter_state1(i), capture_audio_->synthesis_filter_state2(i)); } } // The level estimator operates on the recombined data. err = level_estimator_->ProcessStream(capture_audio_); if (err != kNoError) { return err; } capture_audio_->InterleaveTo(frame, data_changed); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { audioproc::Stream* msg = event_msg_->mutable_stream(); const size_t data_size = sizeof(int16_t) * frame->_payloadDataLengthInSamples * frame->_audioChannel; msg->set_output_data(frame->_payloadData, data_size); err = WriteMessageToDebugFile(); if (err != kNoError) { return err; } } #endif was_stream_delay_set_ = false; return kNoError; } int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { CriticalSectionScoped crit_scoped(*crit_); int err = kNoError; if (frame == NULL) { return kNullPointerError; } if (frame->_frequencyInHz != sample_rate_hz_) { return kBadSampleRateError; } if (frame->_audioChannel != num_reverse_channels_) { return kBadNumberChannelsError; } if (frame->_payloadDataLengthInSamples != samples_per_channel_) { return kBadDataLengthError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::REVERSE_STREAM); audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); const size_t data_size = sizeof(int16_t) * frame->_payloadDataLengthInSamples * frame->_audioChannel; msg->set_data(frame->_payloadData, data_size); err = WriteMessageToDebugFile(); if (err != kNoError) { return err; } } #endif render_audio_->DeinterleaveFrom(frame); // TODO(ajm): turn the splitting filter into a component? if (sample_rate_hz_ == kSampleRate32kHz) { for (int i = 0; i < num_reverse_channels_; i++) { // Split into low and high band. SplittingFilterAnalysis(render_audio_->data(i), render_audio_->low_pass_split_data(i), render_audio_->high_pass_split_data(i), render_audio_->analysis_filter_state1(i), render_audio_->analysis_filter_state2(i)); } } // TODO(ajm): warnings possible from components? err = echo_cancellation_->ProcessRenderAudio(render_audio_); if (err != kNoError) { return err; } err = echo_control_mobile_->ProcessRenderAudio(render_audio_); if (err != kNoError) { return err; } err = gain_control_->ProcessRenderAudio(render_audio_); if (err != kNoError) { return err; } return err; // TODO(ajm): this is for returning warnings; necessary? } int AudioProcessingImpl::set_stream_delay_ms(int delay) { was_stream_delay_set_ = true; if (delay < 0) { return kBadParameterError; } // TODO(ajm): the max is rather arbitrarily chosen; investigate. if (delay > 500) { stream_delay_ms_ = 500; return kBadStreamParameterWarning; } stream_delay_ms_ = delay; return kNoError; } int AudioProcessingImpl::stream_delay_ms() const { return stream_delay_ms_; } bool AudioProcessingImpl::was_stream_delay_set() const { return was_stream_delay_set_; } int AudioProcessingImpl::StartDebugRecording( const char filename[AudioProcessing::kMaxFilenameSize]) { CriticalSectionScoped crit_scoped(*crit_); assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize); if (filename == NULL) { return kNullPointerError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // Stop any ongoing recording. if (debug_file_->Open()) { if (debug_file_->CloseFile() == -1) { return kFileError; } } if (debug_file_->OpenFile(filename, false) == -1) { debug_file_->CloseFile(); return kFileError; } int err = WriteInitMessage(); if (err != kNoError) { return err; } return kNoError; #else return kUnsupportedFunctionError; #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } int AudioProcessingImpl::StopDebugRecording() { CriticalSectionScoped crit_scoped(*crit_); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // We just return if recording hasn't started. if (debug_file_->Open()) { if (debug_file_->CloseFile() == -1) { return kFileError; } } return kNoError; #else return kUnsupportedFunctionError; #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } EchoCancellation* AudioProcessingImpl::echo_cancellation() const { return echo_cancellation_; } EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { return echo_control_mobile_; } GainControl* AudioProcessingImpl::gain_control() const { return gain_control_; } HighPassFilter* AudioProcessingImpl::high_pass_filter() const { return high_pass_filter_; } LevelEstimator* AudioProcessingImpl::level_estimator() const { return level_estimator_; } NoiseSuppression* AudioProcessingImpl::noise_suppression() const { return noise_suppression_; } VoiceDetection* AudioProcessingImpl::voice_detection() const { return voice_detection_; } WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) { CriticalSectionScoped crit_scoped(*crit_); /*WEBRTC_TRACE(webrtc::kTraceModuleCall, webrtc::kTraceAudioProcessing, id_, "ChangeUniqueId(new id = %d)", id);*/ id_ = id; return kNoError; } bool AudioProcessingImpl::stream_data_changed() const { int enabled_count = 0; std::list<ProcessingComponent*>::const_iterator it; for (it = component_list_.begin(); it != component_list_.end(); it++) { if ((*it)->is_component_enabled()) { enabled_count++; } } // Data is unchanged if no components are enabled, or if only level_estimator_ // or voice_detection_ is enabled. if (enabled_count == 0) { return false; } else if (enabled_count == 1) { if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) { return false; } } else if (enabled_count == 2) { if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) { return false; } } return true; } bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const { return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz); } bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const { if (!stream_data_changed && !voice_detection_->is_enabled()) { // Only level_estimator_ is enabled. return false; } else if (sample_rate_hz_ == kSampleRate32kHz) { // Something besides level_estimator_ is enabled, and we have super-wb. return true; } return false; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP int AudioProcessingImpl::WriteMessageToDebugFile() { int32_t size = event_msg_->ByteSize(); if (size <= 0) { return kUnspecifiedError; } #if defined(WEBRTC_BIG_ENDIAN) // TODO(ajm): Use little-endian "on the wire". For the moment, we can be // pretty safe in assuming little-endian. #endif if (!event_msg_->SerializeToString(&event_str_)) { return kUnspecifiedError; } // Write message preceded by its size. if (!debug_file_->Write(&size, sizeof(int32_t))) { return kFileError; } if (!debug_file_->Write(event_str_.data(), event_str_.length())) { return kFileError; } event_msg_->Clear(); return 0; } int AudioProcessingImpl::WriteInitMessage() { event_msg_->set_type(audioproc::Event::INIT); audioproc::Init* msg = event_msg_->mutable_init(); msg->set_sample_rate(sample_rate_hz_); msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz()); msg->set_num_input_channels(num_input_channels_); msg->set_num_output_channels(num_output_channels_); msg->set_num_reverse_channels(num_reverse_channels_); int err = WriteMessageToDebugFile(); if (err != kNoError) { return err; } return kNoError; } #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } // namespace webrtc