/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "sola_time_scaler.h" #include <math.h> #include <hlogging.h> #include <algorithm> #include "ring_buffer.h" #define FLAGS_sola_ring_buffer 2.0 #define FLAGS_sola_enable_correlation true namespace video_editing { // Returns a cross-correlation score for the specified buffers. int SolaAnalyzer::Correlate(const float* buffer1, const float* buffer2, int num_frames) { CHECK(initialized_); int score = 0; num_frames *= num_channels_; while (num_frames-- > 0) { // Increment the score if the sign bits match. score += ((bit_cast<int32>(*buffer1++) ^ bit_cast<int32>(*buffer2++)) >= 0) ? 1 : 0; } return score; } // Trivial SolaAnalyzer class to bypass correlation. class SolaBypassAnalyzer : public SolaAnalyzer { public: SolaBypassAnalyzer() { } virtual int Correlate(const float*, const float*, int num_frames) { return num_frames * num_channels_; } }; // Default constructor. SolaTimeScaler::SolaTimeScaler() : input_buffer_(NULL), output_buffer_(NULL), analyzer_(NULL) { sample_rate_ = 0; num_channels_ = 0; draining_ = false; initialized_ = false; } SolaTimeScaler::~SolaTimeScaler() { delete input_buffer_; delete output_buffer_; delete analyzer_; } // Injects a SolaAnalyzer instance for analyzing signal frames. void SolaTimeScaler::set_analyzer(SolaAnalyzer* analyzer) { MutexLock lock(&mutex_); // lock out processing while updating delete analyzer_; analyzer_ = analyzer; } // Initializes a SOLA timescaler. void SolaTimeScaler::Init(double sample_rate, int num_channels, double initial_speed, double window_duration, double overlap_duration) { MutexLock lock(&mutex_); // lock out processing while updating sample_rate_ = sample_rate; num_channels_ = num_channels; speed_ = initial_speed; window_duration_ = window_duration; overlap_duration_ = overlap_duration; initialized_ = true; GenerateParameters(); Reset(); } // Adjusts the rate scaling factor. void SolaTimeScaler::set_speed(double speed) { MutexLock lock(&mutex_); // lock out processing while updating speed_ = speed; GenerateParameters(); } // Generates processing parameters from the current settings. void SolaTimeScaler::GenerateParameters() { if (speed_ < 0.1) { LOGE("Requested speed %fx limited to 0.1x", speed_); speed_ = 0.1; } else if (speed_ > 8.0) { LOGE("Requested speed %fx limited to 8.0x", speed_); speed_ = 8.0; } ratio_ = 1.0 / speed_; num_window_frames_ = nearbyint(sample_rate_ * window_duration_); // Limit the overlap to half the window size, and round up to an odd number. // Half of overlap window (rounded down) is also a useful number. overlap_duration_ = min(overlap_duration_, window_duration_ / 2.0); num_overlap_frames_ = nearbyint(sample_rate_ * overlap_duration_); num_overlap_frames_ |= 1; half_overlap_frames_ = num_overlap_frames_ >> 1; if (speed_ >= 1.) { // For compression (speed up), adjacent input windows overlap in the output. input_window_offset_ = num_window_frames_; target_merge_offset_ = nearbyint(num_window_frames_ * ratio_); } else { // For expansion (slow down), each input window start point overlaps the // previous, and they are placed adjacently in the output // (+/- half the overlap size). input_window_offset_ = nearbyint(num_window_frames_ * speed_); target_merge_offset_ = num_window_frames_; } // Make sure we copy enough extra data to be able to perform a // frame correlation over the range of target merge point +/- half overlap, // even when the previous merge point was adjusted backwards a half overlap. max_frames_to_merge_ = max(num_window_frames_, target_merge_offset_ + (2 * num_overlap_frames_)); min_output_to_hold_= max_frames_to_merge_ + num_overlap_frames_ - target_merge_offset_; } // The input buffer has one writer and reader. // The output buffer has one reader/updater, and one reader/consumer. static const int kInputReader = 0; static const int kOutputAnalysis = 0; static const int kOutputConsumer = 1; void SolaTimeScaler::Reset() { CHECK(initialized_); double duration = max(FLAGS_sola_ring_buffer, 20. * window_duration_); draining_ = false; delete input_buffer_; input_buffer_ = new RingBuffer(); input_buffer_->Init(static_cast<int> (sample_rate_ * duration), num_channels_, 1); delete output_buffer_; output_buffer_ = new RingBuffer(); output_buffer_->Init(static_cast<int> (sample_rate_ * ratio_ * duration), num_channels_, 2); if (analyzer_ == NULL) { if (FLAGS_sola_enable_correlation) { analyzer_ = new SolaAnalyzer(); } else { analyzer_ = new SolaBypassAnalyzer(); } } analyzer_->Init(sample_rate_, num_channels_); } // Returns the number of frames that the input buffer can accept. int SolaTimeScaler::input_limit() const { CHECK(initialized_); return input_buffer_->overhead(); } // Returns the number of available output frames. int SolaTimeScaler::available() { CHECK(initialized_); int available = output_buffer_->available(kOutputConsumer); if (available > min_output_to_hold_) { available -= min_output_to_hold_; } else if (draining_) { Process(); available = output_buffer_->available(kOutputConsumer); if (available > min_output_to_hold_) { available -= min_output_to_hold_; } } else { available = 0; } return available; } void SolaTimeScaler::Drain() { CHECK(initialized_); draining_ = true; } // Feeds audio to the timescaler, and processes as much data as possible. int SolaTimeScaler::InjectSamples(float* buffer, int num_frames) { CHECK(initialized_); // Do not write more frames than the buffer can accept. num_frames = min(input_limit(), num_frames); if (!num_frames) { return 0; } // Copy samples to the input buffer and then process whatever can be consumed. input_buffer_->Write(buffer, num_frames); Process(); return num_frames; } // Retrieves audio data from the timescaler. int SolaTimeScaler::RetrieveSamples(float* buffer, int num_frames) { CHECK(initialized_); // Do not read more frames than available. num_frames = min(available(), num_frames); if (!num_frames) { return 0; } output_buffer_->Copy(kOutputConsumer, buffer, num_frames); output_buffer_->Seek(kOutputConsumer, output_buffer_->Tell(kOutputConsumer) + num_frames); return num_frames; } // Munges input samples to produce output. bool SolaTimeScaler::Process() { CHECK(initialized_); bool generated_data = false; // We can only process data if there is sufficient input available // (or we are draining the latency), and there is sufficient room // for output to be merged. while (((input_buffer_->available(kInputReader) > max_frames_to_merge_) || draining_) && (output_buffer_->overhead() >= max_frames_to_merge_)) { MutexLock lock(&mutex_); // lock out updates while processing each window // Determine the number of samples to merge into the output. int input_count = min(input_buffer_->available(kInputReader), max_frames_to_merge_); if (input_count == 0) { break; } // The input reader always points to the next window to process. float* input_pointer = input_buffer_->GetPointer(kInputReader, input_count); // The analysis reader always points to the ideal target merge point, // minus half an overlap window (ie, the starting point for correlation). // That means the available data from that point equals the number // of samples that must be cross-faded. int output_merge_cnt = output_buffer_->available(kOutputAnalysis); float* output_pointer = output_buffer_->GetPointer(kOutputAnalysis, output_merge_cnt); // If there is not enough data to do a proper correlation, // just merge at the ideal target point. Otherwise, // find the best correlation score, working from the center out. int merge_offset = min(output_merge_cnt, half_overlap_frames_); if ((output_merge_cnt >= (2 * num_overlap_frames_)) && (input_count >= num_overlap_frames_)) { int best_offset = merge_offset; int best_score = 0; int score; for (int i = 0; i <= half_overlap_frames_; ++i) { score = analyzer_->Correlate(input_pointer, output_pointer + ((merge_offset + i) * num_channels_), num_overlap_frames_); if (score > best_score) { best_score = score; best_offset = merge_offset + i; if (score == (num_overlap_frames_ * num_channels_)) { break; // It doesn't get better than perfect. } } if (i > 0) { score = analyzer_->Correlate(input_pointer, output_pointer + ((merge_offset - i) * num_channels_), num_overlap_frames_); if (score > best_score) { best_score = score; best_offset = merge_offset - i; if (score == (num_overlap_frames_ * num_channels_)) { break; // It doesn't get better than perfect. } } } } merge_offset = best_offset; } else if ((output_merge_cnt > 0) && !draining_) { LOGE("no correlation performed"); } // Crossfade the overlap between input and output, and then // copy in the remaining input. int crossfade_count = max(0, (output_merge_cnt - merge_offset)); crossfade_count = min(crossfade_count, input_count); int remaining_count = input_count - crossfade_count; float* merge_pointer = output_pointer + (merge_offset * num_channels_); float flt_count = static_cast<float>(crossfade_count); for (int i = 0; i < crossfade_count; ++i) { // Linear cross-fade, for now. float input_scale = static_cast<float>(i) / flt_count; float output_scale = 1. - input_scale; for (int j = 0; j < num_channels_; ++j) { *merge_pointer = (*merge_pointer * output_scale) + (*input_pointer++ * input_scale); ++merge_pointer; } } // Copy the merged buffer back into the output, if necessary, and // append the rest of the window. output_buffer_->MergeBack(kOutputAnalysis, output_pointer, output_merge_cnt); output_buffer_->Write(input_pointer, remaining_count); // Advance the output analysis pointer to the next target merge point, // minus half an overlap window. The target merge point is always // calculated as a delta from the previous ideal target, not the actual // target, to avoid drift. int output_advance = target_merge_offset_; if (output_merge_cnt < half_overlap_frames_) { // On the first window, back up the pointer for the next correlation. // Thereafter, that compensation is preserved. output_advance -= half_overlap_frames_; } // Don't advance beyond the available data, when finishing up. if (draining_) { output_advance = min(output_advance, output_buffer_->available(kOutputAnalysis)); } output_buffer_->Seek(kOutputAnalysis, output_buffer_->Tell(kOutputAnalysis) + output_advance); // Advance the input pointer beyond the frames that are no longer needed. input_buffer_->Seek(kInputReader, input_buffer_->Tell(kInputReader) + min(input_count, input_window_offset_)); if ((crossfade_count + remaining_count) > 0) { generated_data = true; } } // while (more to process) return generated_data; } } // namespace video_editing