webrtc视频帧率控制算法机制(一)--目标码率丢帧

前言

本篇文章的丢帧是依据编码后的码率和目标码率来决定丢帧,
而下一篇文章介绍的丢帧依据是目标帧率。

http://www.jianshu.com/p/fe303bdabc26

由此可对丢帧策略分类如下:
  • 编码后的码率和目标码率来决定丢帧
  • 目标帧率决定丢帧

整个帧率控制多次使用的算法---指数权重滤波(暂且如此命名)

在exp_filter.cc文件中:

#include "webrtc/base/exp_filter.h"
#include <math.h>
namespace rtc {
const float ExpFilter::kValueUndefined = -1.0f;
void ExpFilter::Reset(float alpha) {
  alpha_ = alpha;
  filtered_ = kValueUndefined;
}
float ExpFilter::Apply(float exp, float sample) {
  if (filtered_ == kValueUndefined) {
    // Initialize filtered value.
    filtered_ = sample;
  } else if (exp == 1.0) {
    filtered_ = alpha_ * filtered_ + (1 - alpha_) * sample;
  } else {
    float alpha = pow(alpha_, exp);
    filtered_ = alpha  * filtered_ + (1 - alpha)  * sample;
  }
  if (max_ != kValueUndefined && filtered_ > max_) {
    filtered_ = max_;
  }
  return filtered_;
}
void ExpFilter::UpdateBase(float alpha) {
  alpha_ = alpha;
}
}  // namespace rtc

这个文件的大概思想就是对历史值和当前值做指数加权求和。公式为:

f(x)=alpha*f(x-1)+(1-alpha)*sample;
alpha=pow(alpha_, exp);

其中alpha_为设定常量,exp为幂次方,sample为最新样点值。
后面还有:

f(x)=min(f(x),max);即不要超过max。

调用丢帧

bool MediaOptimization::DropFrame() {
  CriticalSectionScoped lock(crit_sect_.get());
  UpdateIncomingFrameRate();
  // Leak appropriate number of bytes.
  frame_dropper_->Leak((uint32_t)(InputFrameRateInternal() + 0.5f));
  if (video_suspended_) {
    return true;  // Drop all frames when muted.
  }
  return frame_dropper_->DropFrame();
}

解释:

  • UpdateIncomingFrameRate();更新采集出来的帧率。
  • frame_dropper_->Leak((uint32_t)(InputFrameRateInternal() + 0.5f));这里主要利用采集帧率,去更新丢帧比率等关键丢帧信息。
  • return frame_dropper_->DropFrame();这里就是根据前面计算的丢帧比率等去实现均匀丢帧。
    这些函数的具体实现后面会一一介绍。

更新采集出来的帧率

void MediaOptimization::UpdateIncomingFrameRate() {
  int64_t now = clock_->TimeInMilliseconds();
  if (incoming_frame_times_[0] == 0) {
    // No shifting if this is the first time.
  } else {
    // Shift all times one step.
    for (int32_t i = (kFrameCountHistorySize - 2); i >= 0; i--) {
      incoming_frame_times_[i + 1] = incoming_frame_times_[i];
    }
  }
  incoming_frame_times_[0] = now;
  ProcessIncomingFrameRate(now);
}
//framerate=n/t
void MediaOptimization::ProcessIncomingFrameRate(int64_t now) {
  int32_t num = 0;
  int32_t nr_of_frames = 0;
  for (num = 1; num < (kFrameCountHistorySize - 1); ++num) {
    if (incoming_frame_times_[num] <= 0 ||
        // don't use data older than 2 s
        now - incoming_frame_times_[num] > kFrameHistoryWinMs) {
      break;
    } else {
      nr_of_frames++;
    }
  }
  if (num > 1) {
    const int64_t diff = now - incoming_frame_times_[num - 1];
    incoming_frame_rate_ = 1.0;
    if (diff > 0) {
      incoming_frame_rate_ = nr_of_frames * 1000.0f / static_cast<float>(diff);
    }
  }
}

解释:
这一段比较好理解,就是根据每一帧到来的时间,最多2秒钟的统计,利用公式:
incoming_frame_rate_ = nr_of_frames * 1000.0f / static_cast<float>(diff);
得到这一段时间的采集帧率。
对于统计数据,

  for (int32_t i = (kFrameCountHistorySize - 2); i >= 0; i--) {
     incoming_frame_times_[i + 1] = incoming_frame_times_[i];
   }

可见这是一个滑动窗口,即总是用最新的kFrameCountHistorySize 大小的数据。

丢帧算法主要实现

丢帧算法全部在frame_dropper.cc文件中,下面先通过代码解读,在细说算法实现。
此为frame_dropper.cc文件内容,及注释

/*
 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "webrtc/modules/video_coding/utility/include/frame_dropper.h"

#include "webrtc/system_wrappers/interface/trace.h"

namespace webrtc
{

const float kDefaultKeyFrameSizeAvgKBits = 0.9f;
const float kDefaultKeyFrameRatio = 0.99f;
const float kDefaultDropRatioAlpha = 0.9f;
const float kDefaultDropRatioMax = 0.96f;
const float kDefaultMaxTimeToDropFrames = 4.0f;  // In seconds.

FrameDropper::FrameDropper()
:
_keyFrameSizeAvgKbits(kDefaultKeyFrameSizeAvgKBits),
_keyFrameRatio(kDefaultKeyFrameRatio),
_dropRatio(kDefaultDropRatioAlpha, kDefaultDropRatioMax),
_enabled(true),
_max_time_drops(kDefaultMaxTimeToDropFrames)
{
    Reset();
}

FrameDropper::FrameDropper(float max_time_drops)
:
_keyFrameSizeAvgKbits(kDefaultKeyFrameSizeAvgKBits),
_keyFrameRatio(kDefaultKeyFrameRatio),
_dropRatio(kDefaultDropRatioAlpha, kDefaultDropRatioMax),
_enabled(true),
_max_time_drops(max_time_drops)
{
    Reset();
}

void
FrameDropper::Reset()
{
    _keyFrameRatio.Reset(0.99f);
    _keyFrameRatio.Apply(1.0f, 1.0f/300.0f); // 1 key frame every 10th second in 30 fps
    _keyFrameSizeAvgKbits.Reset(0.9f);
    _keyFrameCount = 0;
    _accumulator = 0.0f;
    _accumulatorMax = 150.0f; // assume 300 kb/s and 0.5 s window
    _targetBitRate = 300.0f;
    _incoming_frame_rate = 30;
    _keyFrameSpreadFrames = 0.5f * _incoming_frame_rate;
    _dropNext = false;
    _dropRatio.Reset(0.9f);
    _dropRatio.Apply(0.0f, 0.0f); // Initialize to 0
    _dropCount = 0;
    _windowSize = 0.5f;
    _wasBelowMax = true;
    _fastMode = false; // start with normal (non-aggressive) mode
    // Cap for the encoder buffer level/accumulator, in secs.
    _cap_buffer_size = 3.0f;
    // Cap on maximum amount of dropped frames between kept frames, in secs.
    _max_time_drops = 4.0f;
}

void
FrameDropper::Enable(bool enable)
{
    _enabled = enable;
}

//deltaFrame : 0:key frame 1:P frame
void
FrameDropper::Fill(size_t frameSizeBytes, bool deltaFrame)
{
    if (!_enabled)
    {
        return;
    }
    float frameSizeKbits = 8.0f * static_cast<float>(frameSizeBytes) / 1000.0f;
    if (!deltaFrame && !_fastMode) // fast mode does not treat key-frames any different//非fast_mode而且key_frame
    {
        //exp=1.0时,filtered_ = alpha_ * filtered_ + (1 - alpha_) * sample;当alpha_=0.8或0.9时,则更偏重于历史值,而非当前sample
        _keyFrameSizeAvgKbits.Apply(1, frameSizeKbits);
        _keyFrameRatio.Apply(1.0, 1.0);//_keyFrameRatio同样偏重于历史值,而当前值设置为1,因为当前为key frame ,所以值为1
        if (frameSizeKbits > _keyFrameSizeAvgKbits.filtered())//当前值大于均值
        {
            // Remove the average key frame size since we
            // compensate for key frames when adding delta
            // frames.
            frameSizeKbits -= _keyFrameSizeAvgKbits.filtered();//超出均值的部分
        }
        else
        {
            // Shouldn't be negative, so zero is the lower bound.
            frameSizeKbits = 0;
        }
        if (_keyFrameRatio.filtered() > 1e-5 &&
            1 / _keyFrameRatio.filtered() < _keyFrameSpreadFrames)   //_keyFrameSpreadFrames = 0.5f * inputFrameRate;
        {
            // We are sending key frames more often than our upper bound for
            // how much we allow the key frame compensation to be spread
            // out in time. Therefor we must use the key frame ratio rather
            // than keyFrameSpreadFrames.
            _keyFrameCount =
                static_cast<int32_t>(1 / _keyFrameRatio.filtered() + 0.5);//每一秒关键帧的数量?
        }
        else
        {
            // Compensate for the key frame the following frames
            _keyFrameCount = static_cast<int32_t>(_keyFrameSpreadFrames + 0.5);
        }
    }
    else
    {
        // Decrease the keyFrameRatio
        _keyFrameRatio.Apply(1.0, 0.0);//因为这是P帧,降低_keyFrameRatio的fileter值,因为sample=0
    }
    // Change the level of the accumulator (bucket)
    _accumulator += frameSizeKbits; //_accumulator是frameSizeKbits的累加器,表示超过均值的bit值累加
    CapAccumulator();//max_accumulator = _targetBitRate * _cap_buffer_size;累加器最多为max_accumulator,3倍目标码率
}

void
FrameDropper::Leak(uint32_t inputFrameRate)
{
    if (!_enabled)
    {
        return;
    }
    if (inputFrameRate < 1)
    {
        return;
    }
    if (_targetBitRate < 0.0f)
    {
        return;
    }
    _keyFrameSpreadFrames = 0.5f * inputFrameRate;
    // T is the expected bits per frame (target). If all frames were the same size,
    // we would get T bits per frame. Notice that T is also weighted to be able to
    // force a lower frame rate if wanted.
    float T = _targetBitRate / inputFrameRate;//T:每一帧期望的bit大小,从下面内容,明显这个T代表的是每个P帧期望的大小,K帧是另外补偿的
    if (_keyFrameCount > 0)
    {
        // Perform the key frame compensation
        if (_keyFrameRatio.filtered() > 0 &&
            1 / _keyFrameRatio.filtered() < _keyFrameSpreadFrames)
        {
            T -= _keyFrameSizeAvgKbits.filtered() * _keyFrameRatio.filtered();//_keyFrameSizeAvgKbits.filtered() * _keyFrameRatio.filtered()为keyframe在每一帧均摊的占用的kbit
        }
        else
        {
            T -= _keyFrameSizeAvgKbits.filtered() / _keyFrameSpreadFrames;//
        }
        _keyFrameCount--;//补偿一个关键帧,则关键帧数-1.
    }
    _accumulator -= T;//累加器在编码后增加,在编码前减去当前帧占用的大小
    if (_accumulator < 0.0f)
    {
        _accumulator = 0.0f;
    }
    UpdateRatio();
}

void
FrameDropper::UpdateNack(uint32_t nackBytes)
{
    if (!_enabled)
    {
        return;
    }
    _accumulator += static_cast<float>(nackBytes) * 8.0f / 1000.0f;
}

void
FrameDropper::FillBucket(float inKbits, float outKbits)
{
    _accumulator += (inKbits - outKbits);
}

void
FrameDropper::UpdateRatio()
{
    if (_accumulator > 1.3f * _accumulatorMax)//_accumulatorMax = bitRate * _windowSize;累加器过大之后,减小alpha值,_dropRatio更偏重当前值
    {
        // Too far above accumulator max, react faster
        _dropRatio.UpdateBase(0.8f);
    }
    else
    {
        // Go back to normal reaction
        _dropRatio.UpdateBase(0.9f);
    }
    if (_accumulator > _accumulatorMax)
    {
        // We are above accumulator max, and should ideally
        // drop a frame. Increase the dropRatio and drop
        // the frame later.
        if (_wasBelowMax)//_wasBelowMax = _accumulator < _accumulatorMax;上一次小于_accumulatorMax
        {
            _dropNext = true;//丢掉下一帧
        }
        if (_fastMode)
        {
            // always drop in aggressive mode
            _dropNext = true;
        }

        _dropRatio.Apply(1.0f, 1.0f);//因为丢帧,所以sample为1
        _dropRatio.UpdateBase(0.9f);
    }
    else
    {
        _dropRatio.Apply(1.0f, 0.0f);//不丢帧,sample为0
    }
    _wasBelowMax = _accumulator < _accumulatorMax;
}

// This function signals when to drop frames to the caller. It makes use of the dropRatio
// to smooth out the drops over time.
bool
FrameDropper::DropFrame()
{
    if (!_enabled)
    {
        return false;
    }
    if (_dropNext)
    {
        _dropNext = false;
        _dropCount = 0;
    }

    if (_dropRatio.filtered() >= 0.5f) // Drops per keep//>=0.5表示当前帧不丢,下一帧一定丢,即2个至少丢一个
    {
        // limit is the number of frames we should drop between each kept frame
        // to keep our drop ratio. limit is positive in this case.
        float denom = 1.0f - _dropRatio.filtered();//denom:分母,表示不丢的比率
        if (denom < 1e-5)
        {
            denom = (float)1e-5;
        }
        int32_t limit = static_cast<int32_t>(1.0f / denom - 1.0f + 0.5f);//这里注释意思limit代表需要丢掉的帧数,即如果当前帧不丢,则后面有limit帧需要丢掉
        // Put a bound on the max amount of dropped frames between each kept
        // frame, in terms of frame rate and window size (secs).
        int max_limit = static_cast<int>(_incoming_frame_rate *
                                         _max_time_drops);//4倍帧率,max_limit则表示连续丢掉4倍帧率的帧,明显太大了
        if (limit > max_limit) {
          limit = max_limit;
        }
        if (_dropCount < 0)//_dropCount表示当前这一轮丢帧,已经丢掉的帧数
        {
            // Reset the _dropCount since it was negative and should be positive.
            if (_dropRatio.filtered() > 0.4f)
            {
                _dropCount = -_dropCount;
            }
            else
            {
                _dropCount = 0;
            }
        }
        if (_dropCount < limit)//直到丢掉limit帧
        {
            // As long we are below the limit we should drop frames.
            _dropCount++;
            return true;
        }
        else
        {
            // Only when we reset _dropCount a frame should be kept.
            _dropCount = 0;
            return false;
        }
    }
    else if (_dropRatio.filtered() > 0.0f &&
        _dropRatio.filtered() < 0.5f) // Keeps per drop//表示当前帧不丢,下一帧可能丢,也可能不丢,即每隔若干帧丢一帧
    {
        // limit is the number of frames we should keep between each drop
        // in order to keep the drop ratio. limit is negative in this case,
        // and the _dropCount is also negative.
        float denom = _dropRatio.filtered();
        if (denom < 1e-5)
        {
            denom = (float)1e-5;
        }
        int32_t limit = -static_cast<int32_t>(1.0f / denom - 1.0f + 0.5f);
        if (_dropCount > 0)
        {
            // Reset the _dropCount since we have a positive
            // _dropCount, and it should be negative.
            if (_dropRatio.filtered() < 0.6f)
            {
                _dropCount = -_dropCount;
            }
            else
            {
                _dropCount = 0;
            }
        }
        if (_dropCount > limit)
        {
            if (_dropCount == 0)
            {
                // Drop frames when we reset _dropCount.
                _dropCount--;
                return true;//丢,明显每次只丢一帧
            }
            else
            {
                // Keep frames as long as we haven't reached limit.
                _dropCount--;
                return false;//不丢,直到_dropCount > limit,则重新置_dropCount = 0;开始新一轮丢帧
            }
        }
        else
        {
            _dropCount = 0;
            return false;
        }
    }
    _dropCount = 0;
    return false;

    // A simpler version, unfiltered and quicker
    //bool dropNext = _dropNext;
    //_dropNext = false;
    //return dropNext;
}

void
FrameDropper::SetRates(float bitRate, float incoming_frame_rate)
{
    // Bit rate of -1 means infinite bandwidth.
    _accumulatorMax = bitRate * _windowSize; // bitRate * windowSize (in seconds)
    if (_targetBitRate > 0.0f && bitRate < _targetBitRate && _accumulator > _accumulatorMax)
    {
        // Rescale the accumulator level if the accumulator max decreases
        _accumulator = bitRate / _targetBitRate * _accumulator;
    }
    _targetBitRate = bitRate;
    CapAccumulator();
    _incoming_frame_rate = incoming_frame_rate;
}

float
FrameDropper::ActualFrameRate(uint32_t inputFrameRate) const
{
    if (!_enabled)
    {
        return static_cast<float>(inputFrameRate);
    }
    return inputFrameRate * (1.0f - _dropRatio.filtered());//实际编码帧率
}

// Put a cap on the accumulator, i.e., don't let it grow beyond some level.
// This is a temporary fix for screencasting where very large frames from
// encoder will cause very slow response (too many frame drops).
void FrameDropper::CapAccumulator() {
  float max_accumulator = _targetBitRate * _cap_buffer_size;
  if (_accumulator > max_accumulator) {
    _accumulator = max_accumulator;
  }
}

}

1、丢帧的决定因素在_dropRatio.Apply(1.0f, 1.0f);通过给_dropRatio赋值,使得_dropRatio不为0.而_dropRatio.Apply(1.0f, 1.0f);调用的起因,还在

int32_t VCMEncodedFrameCallback::Encoded
->int32_t MediaOptimization::UpdateWithEncodedData
->FrameDropper::Fill(size_t frameSizeBytes, bool deltaFrame)

通过Fill函数中的_accumulator(累加器),再通过

FrameDropper::Leak(uint32_t inputFrameRate)
->FrameDropper::UpdateRatio()

来最终调用_dropRatio.Apply(1.0f, 1.0f)或_dropRatio.Apply(1.0f, 0.0f)

2、丢帧的方法
在FrameDropper::DropFrame()函数中,通过上面注释的代码也可以理解。

drop.png

就是当dropRatio>=0.5时,两个帧之间可能丢多个;当dropRatio<0.5时,两个帧之间最多丢一个。

3、调用丢帧的地方

  • int32_t VideoSender::AddVideoFrame()帧数据加入encoder之前

4、如何从_accumulator控制帧率

  • FrameDropper::Fill()中,每编码完一帧数据,就将数据的大小累加到_accumulator,其中P帧全部累加,K帧只加超出均值的部分。
  • 每个采集后,即将给到编码器的帧,利用_targetBitRate / inputFrameRate;得到每一帧期望占用的bit大小,其中K帧单独计算:
    _keyFrameSizeAvgKbits.filtered() * _keyFrameRatio.filtered();
疑问:
为什么_accumulator累加时,K帧只加超出均值的部分,而不是全部。
```

5、什么时候丢帧
_accumulator > _accumulatorMax;
其中,_accumulatorMax = bitRate * _windowSize;(_windowSize=0.5f)

##编码完后,更新_accumulator 
这一部分只是说明编码完后怎么去更新_accumulator 的流程,比较容易看懂。
```
int32_t VCMEncodedFrameCallback::Encoded(
    const EncodedImage& encodedImage,
    const CodecSpecificInfo* codecSpecificInfo,
    const RTPFragmentationHeader* fragmentationHeader) {
  post_encode_callback_->Encoded(encodedImage, NULL, NULL);

  if (_sendCallback == NULL) {
    return VCM_UNINITIALIZED;
  }

  RTPVideoHeader rtpVideoHeader;
  memset(&rtpVideoHeader, 0, sizeof(RTPVideoHeader));
  RTPVideoHeader* rtpVideoHeaderPtr = &rtpVideoHeader;
  CopyCodecSpecific(codecSpecificInfo, &rtpVideoHeaderPtr);

  int32_t callbackReturn = _sendCallback->SendData(
      _payloadType, encodedImage, *fragmentationHeader, rtpVideoHeaderPtr);
  if (callbackReturn < 0) {
    return callbackReturn;
  }

  if (_mediaOpt != NULL) {

   //编码后的统计信息更新
    _mediaOpt->UpdateWithEncodedData(encodedImage);

    if (_internalSource)
      return _mediaOpt->DropFrame();  // Signal to encoder to drop next frame.
  }
  return VCM_OK;
}
```

```
int32_t MediaOptimization::UpdateWithEncodedData(
    const EncodedImage& encoded_image) {
  size_t encoded_length = encoded_image._length;
  uint32_t timestamp = encoded_image._timeStamp;
  CriticalSectionScoped lock(crit_sect_.get());
  const int64_t now_ms = clock_->TimeInMilliseconds();
  PurgeOldFrameSamples(now_ms);
  if (encoded_frame_samples_.size() > 0 &&
      encoded_frame_samples_.back().timestamp == timestamp) {
    // Frames having the same timestamp are generated from the same input
    // frame. We don't want to double count them, but only increment the
    // size_bytes.
    encoded_frame_samples_.back().size_bytes += encoded_length;
    encoded_frame_samples_.back().time_complete_ms = now_ms;
  } else {
    encoded_frame_samples_.push_back(
        EncodedFrameSample(encoded_length, timestamp, now_ms));
  }
  UpdateSentBitrate(now_ms);
  UpdateSentFramerate();
  if (encoded_length > 0) {
    const bool delta_frame = encoded_image._frameType != kKeyFrame;//0:key 1:P

    //这里将每次编码完的数据长度Fill到frame_dropper
    frame_dropper_->Fill(encoded_length, delta_frame);

    if (max_payload_size_ > 0 && encoded_length > 0) {
      const float min_packets_per_frame =
          encoded_length / static_cast<float>(max_payload_size_);
      if (delta_frame) {
        loss_prot_logic_->UpdatePacketsPerFrame(min_packets_per_frame,
                                                clock_->TimeInMilliseconds());
      } else {
        loss_prot_logic_->UpdatePacketsPerFrameKey(
            min_packets_per_frame, clock_->TimeInMilliseconds());
      }

      if (enable_qm_) {
        // Update quality select with encoded length.
        qm_resolution_->UpdateEncodedSize(encoded_length);
      }
    }
    if (!delta_frame && encoded_length > 0) {
      loss_prot_logic_->UpdateKeyFrameSize(static_cast<float>(encoded_length));
    }

    // Updating counters.
    if (delta_frame) {
      delta_frame_cnt_++;
    } else {
      key_frame_cnt_++;
    }
  }

  return VCM_OK;
}
```

解释:
编码完后的数据都是经过callback回调的,
```
int32_t VCMEncodedFrameCallback::Encoded
->int32_t MediaOptimization::UpdateWithEncodedData
->frame_dropper_->Fill(encoded_length, delta_frame);
```
经过这个流程,每次编码后,送给发送的数据都要去更新frame_dropper_。

后记:
作者对于这一个算法的机制原理,也不是很明白,只能从代码中体会算法实现,不免有错误理解,如有更好理解或者不同见解的道友,敬请赐教,不胜感激!

推荐阅读更多精彩内容